Namespaces
	experimental

	gatordmock

	optimizations

	profiling

	stringUtils

	test

	timelinedecoder

	utility

Classes
struct	abs

class	AbsLayer

struct	AbsQueueDescriptor

struct	ActivationDescriptor
	An ActivationDescriptor for the ActivationLayer. More...

class	ActivationLayer
	This layer represents an activation operation with the specified activation function. More...

struct	ActivationQueueDescriptor

class	AddedLayerObservable

class	AdditionLayer
	This layer represents an addition operation. More...

struct	AdditionQueueDescriptor

struct	Allocator

struct	ArgMinMaxDescriptor
	An ArgMinMaxDescriptor for ArgMinMaxLayer. More...

class	ArgMinMaxLayer
	This layer represents a ArgMinMax operation. More...

struct	ArgMinMaxQueueDescriptor

class	ArmNNProfilingServiceInitialiser

class	BackendCapabilityException

class	BackendId

struct	BackendOptions
	Struct for the users to pass backend specific options. More...

class	BackendRegistry

struct	BackendSettings

class	BackendUnavailableException
	Class for non-fatal exceptions raised while initialising a backend. More...

struct	BackendVersion

class	BadOptionalAccessException

struct	BaseDescriptor
	Base class for all descriptors. More...

class	BaseIterator

class	BaseMemoryManager

class	BaseTensor

class	BaseWorkload

struct	BatchNormalizationDescriptor
	A BatchNormalizationDescriptor for the BatchNormalizationLayer. More...

class	BatchNormalizationLayer
	This layer represents a batch normalization operation. More...

struct	BatchNormalizationQueueDescriptor

struct	BatchToSpaceNdDescriptor
	A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer. More...

class	BatchToSpaceNdLayer
	This layer represents a BatchToSpaceNd operation. More...

struct	BatchToSpaceNdQueueDescriptor

class	BFloat16

class	BFloat16Decoder

class	BFloat16Encoder

struct	BiasAndWeightsTypesCompatible

struct	BiasAndWeightsTypesMatch

class	BindableLayer

class	BooleanDecoder

class	BooleanDecoderBool

class	BooleanEncoder

struct	BroadcastLoop

struct	BufferStorage

struct	Capability
	Capability of the TensorHandleFactory. More...

class	CastLayer
	This layer represents a cast operation. More...

struct	CastQueueDescriptor

struct	ChannelShuffleDescriptor
	A ChannelShuffleDescriptor for the ChannelShuffle operator. More...

class	ChannelShuffleLayer

struct	ChannelShuffleQueueDescriptor

struct	CheckLocation

class	ClAbsWorkload

class	ClActivationWorkload

class	ClAdditionWorkload

class	ClArgMinMaxWorkload

class	ClBackend

class	ClBackendContext

class	ClBackendDefaultAllocator
	Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...

class	ClBackendModelContext
	The ClBackendModelContext is used to pass in CL specific backend ModelOptions. More...

class	ClBaseWorkload

class	ClBatchNormalizationFloatWorkload

class	ClBatchToSpaceNdWorkload

class	ClCastWorkload

class	ClChannelShuffleWorkload

class	ClComparisonWorkload

class	ClConcatWorkload

class	ClConstantWorkload

struct	ClContextBuilder

class	ClContextControl

class	ClContextDeserializer

class	ClContextSerializer

class	ClConvertFp16ToFp32Workload

class	ClConvertFp32ToFp16Workload

class	ClConvolution2dWorkload

class	ClConvolution3dWorkload

class	ClDepthToSpaceWorkload

class	ClDepthwiseConvolutionWorkload

class	ClDequantizeWorkload

class	ClDivisionWorkload

class	ClExpWorkload

class	ClFillWorkload

class	ClFloorFloatWorkload

class	ClFullyConnectedWorkload

class	ClGatherNdWorkload

class	ClGatherWorkload

class	ClImportSubTensorHandle

class	ClImportTensorHandle

class	ClImportTensorHandleFactory
	This factory creates ClImportTensorHandles that refer to imported memory tensors. More...

class	ClInstanceNormalizationWorkload

class	ClL2NormalizationFloatWorkload

class	ClLayerSupport

class	ClLogicalAndWorkload

class	ClLogicalNotWorkload

class	ClLogicalOrWorkload

class	ClLogSoftmaxWorkload

class	ClLogWorkload

class	ClLstmFloatWorkload

class	ClMaximumWorkload

class	ClMeanWorkload

class	ClMemoryManager

class	ClMinimumWorkload

class	ClMultiplicationWorkload

class	ClNegWorkload

class	ClNormalizationFloatWorkload

class	ClPadWorkload

class	ClPermuteWorkload

class	ClPooling2dWorkload

class	ClPooling3dWorkload

class	ClPreluWorkload

class	ClQLstmWorkload

class	ClQuantizedLstmWorkload

class	ClQuantizeWorkload

struct	ClRankWorkload

class	ClReduceWorkload

class	ClReshapeWorkload

class	ClResizeWorkload

class	ClRsqrtWorkload

class	ClRuntimeUnavailableException

class	ClSinWorkload

class	ClSliceWorkload

class	ClSoftmaxWorkload

class	ClSpaceToBatchNdWorkload

class	ClSpaceToDepthWorkload

class	ClSplitterWorkload

class	ClSqrtWorkload

class	ClStackWorkload

class	ClStridedSliceWorkload

class	ClSubTensorHandle

class	ClSubtractionWorkload

class	ClTensorHandle

class	ClTensorHandleFactory

class	ClTransposeConvolution2dWorkload

class	ClTransposeWorkload

class	ClTunedParameters

class	ClUnidirectionalSequenceLstmFloatWorkload

class	ClWorkloadFactory

struct	ComparisonDescriptor
	A ComparisonDescriptor for the ComparisonLayer. More...

class	ComparisonLayer
	This layer represents a comparison operation. More...

struct	ComparisonQueueDescriptor

class	ConcatLayer
	This layer represents a merge operation. More...

struct	ConcatQueueDescriptor

class	ConstantLayer
	A layer that the constant data can be bound to. More...

class	ConstantMemoryStrategy

struct	ConstantQueueDescriptor

class	ConstPassthroughTensorHandle

struct	ConstructInPlace
	Disambiguation tag that can be passed to the constructor to indicate that the contained object should be constructed in-place. More...

class	ConstTensor
	A tensor defined by a TensorInfo (shape and data type) and an immutable backing store. More...

class	ConstTensorHandle

class	ConvertBf16ToFp32Layer
	This layer converts data type BFloat16 to Float32. More...

struct	ConvertBf16ToFp32QueueDescriptor

class	ConvertFp16ToFp32Layer
	This layer converts data type Float 16 to Float 32. More...

struct	ConvertFp16ToFp32QueueDescriptor

class	ConvertFp32ToBf16Layer
	This layer converts data type Float32 to BFloat16. More...

struct	ConvertFp32ToBf16QueueDescriptor

class	ConvertFp32ToFp16Layer
	This layer converts data type Float 32 to Float 16. More...

struct	ConvertFp32ToFp16QueueDescriptor

struct	Convolution2dDescriptor
	A Convolution2dDescriptor for the Convolution2dLayer. More...

class	Convolution2dLayer
	This layer represents a convolution 2d operation. More...

struct	Convolution2dQueueDescriptor

struct	Convolution3dDescriptor
	A Convolution3dDescriptor for the Convolution3dLayer. More...

class	Convolution3dLayer
	This layer represents a convolution 3d operation. More...

struct	Convolution3dQueueDescriptor

class	CopyMemGenericWorkload

class	DebugLayer
	This layer visualizes the data flowing through the network. More...

struct	DebugQueueDescriptor

class	Decoder

class	DefaultAllocator
	Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...

class	DepthToSpaceLayer
	This layer represents a DepthToSpace operation. More...

struct	DepthToSpaceQueueDescriptor

struct	DepthwiseConvolution2dDescriptor
	A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer. More...

class	DepthwiseConvolution2dLayer
	This layer represents a depthwise convolution 2d operation. More...

struct	DepthwiseConvolution2dQueueDescriptor
	Depthwise Convolution 2D layer workload data. More...

class	DequantizeLayer
	This layer dequantizes the input tensor. More...

struct	DequantizeQueueDescriptor

struct	DetectionPostProcessDescriptor

class	DetectionPostProcessLayer
	This layer represents a detection postprocess operator. More...

struct	DetectionPostProcessQueueDescriptor

class	DeviceSpec

class	DivisionLayer
	This layer represents a division operation. More...

struct	DivisionQueueDescriptor

class	DotAttributeSet

class	DotBase

class	DotDefaults

class	DotEdge

class	DotGraph

class	DotNode

class	DynamicBackend

class	DynamicBackendUtils

class	ElementwiseBaseLayer
	NOTE: this is an abstract class to encapsulate the element wise operations, it does not implement: std::unique_ptr<IWorkload> Layer::CreateWorkload(const IWorkloadFactory& factory) const = 0; Layer* Clone(Graph& graph) const = 0;. More...

struct	ElementwiseBinaryFunction

struct	ElementwiseUnaryDescriptor
	A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer. More...

struct	ElementwiseUnaryFunction

class	ElementwiseUnaryLayer
	This layer represents a elementwiseUnary operation. More...

struct	ElementwiseUnaryQueueDescriptor

struct	EmptyOptional
	EmptyOptional is used to initialize the Optional class in case we want to have default value for an Optional in a function declaration. More...

class	Encoder

struct	EqualQueueDescriptor

class	ErasedLayerNamesObservable

class	Event
	Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Event::GetMeasurements() is called. More...

class	Exception
	Base class for all ArmNN exceptions so that users can filter to just those. More...

class	ExecutionFrame

struct	exp

struct	FakeQuantizationDescriptor
	A FakeQuantizationDescriptor for the FakeQuantizationLayer. More...

class	FakeQuantizationLayer
	This layer represents a fake quantization operation. More...

struct	FakeQuantizationQueueDescriptor

class	FileNotFoundException

struct	FillDescriptor
	A FillDescriptor for the FillLayer. More...

class	FillLayer
	This layer represents a fill operation. More...

struct	FillQueueDescriptor

class	FirstInputTypedWorkload

struct	FLATBUFFERS_FINAL_CLASS

class	Float16Decoder

class	Float16Encoder

class	Float32Decoder

class	Float32Encoder

class	FloorLayer
	This layer represents a floor operation. More...

struct	FloorQueueDescriptor

struct	FullyConnectedDescriptor
	A FullyConnectedDescriptor for the FullyConnectedLayer. More...

class	FullyConnectedLayer
	This layer represents a fully connected operation. More...

struct	FullyConnectedQueueDescriptor

struct	GatherDescriptor
	A GatherDescriptor for the GatherLayer. More...

class	GatherLayer
	This layer represents a Gather operator. More...

class	GatherNdLayer
	This layer represents a GatherNd operator. More...

struct	GatherNdQueueDescriptor

struct	GatherQueueDescriptor

class	Graph

class	GraphObservable

class	GraphValidationException

struct	GreaterQueueDescriptor

class	HtmlBold

class	HtmlFont

class	HtmlSection

class	HtmlSimpleTag

class	IAclTensorHandle

class	IBackend
	Each backend should implement an IBackend. More...

class	IBackendContext

class	IBackendInternal

class	IBackendModelContext

class	IClTensorHandle

class	ICLTensorProxy

class	IConnectableLayer
	Interface for a layer that is connectable to other layers via InputSlots and OutputSlots. More...

class	ICustomAllocator
	Custom Allocator interface. More...

class	IDeviceSpec
	Device specific knowledge to be passed to the optimizer. More...

class	IExecutionFrame
	ExecutionFrame interface to enqueue a workload computation. More...

class	IGpuAccTunedParameters
	Manages a set of GpuAcc parameters which have been tuned for maximum performance. More...

class	IGraphObservable

class	IInputSlot
	An input connection slot for a layer. More...

class	ILayerSupport

class	IMemoryManager

class	IMemoryOptimizerStrategy

struct	IMemoryOptimizerStrategyFactory

class	ImportMemGenericWorkload

class	INetwork
	Main network class which provides the interface for building up a neural network. More...

struct	INetworkProperties

class	InputLayer
	A layer user-provided data can be bound to (e.g. inputs, outputs). More...

class	InputSlot

struct	InstanceNormalizationDescriptor
	An InstanceNormalizationDescriptor for InstanceNormalizationLayer. More...

class	InstanceNormalizationLayer
	This layer represents an instance normalization operation. More...

struct	InstanceNormalizationQueueDescriptor

class	Instrument

class	Int32Decoder

class	Int32Encoder

class	Int32ToInt32tDecoder

class	Int32ToInt32tEncoder

class	InvalidArgumentException

class	IOptimizedNetwork

class	IOutputSlot
	An output connection slot for a layer. More...

class	IProfiler

class	IRuntime

struct	IsHalfType

struct	IsMemorySource

struct	IsMemorySource< MemorySource >

class	IStrategy

class	ISubgraphViewConverter

class	ITensorHandle

class	ITensorHandleFactory

class	IWorkload
	Workload interface to enqueue a layer computation. More...

class	IWorkloadFactory

struct	JsonChildObject

class	JsonPrinter

class	JsonUtils

struct	L2NormalizationDescriptor
	A L2NormalizationDescriptor for the L2NormalizationLayer. More...

class	L2NormalizationLayer
	This layer represents a L2 normalization operation. More...

struct	L2NormalizationQueueDescriptor

class	Layer

class	LayerSupportBase

class	LayerSupportHandle

struct	LayerTypeOfImpl

struct	LayerTypeOfImpl< LayerType::Activation >

struct	LayerTypeOfImpl< LayerType::Addition >

struct	LayerTypeOfImpl< LayerType::ArgMinMax >

struct	LayerTypeOfImpl< LayerType::BatchNormalization >

struct	LayerTypeOfImpl< LayerType::BatchToSpaceNd >

struct	LayerTypeOfImpl< LayerType::Cast >

struct	LayerTypeOfImpl< LayerType::ChannelShuffle >

struct	LayerTypeOfImpl< LayerType::Comparison >

struct	LayerTypeOfImpl< LayerType::Concat >

struct	LayerTypeOfImpl< LayerType::Constant >

struct	LayerTypeOfImpl< LayerType::ConvertBf16ToFp32 >

struct	LayerTypeOfImpl< LayerType::ConvertFp16ToFp32 >

struct	LayerTypeOfImpl< LayerType::ConvertFp32ToBf16 >

struct	LayerTypeOfImpl< LayerType::ConvertFp32ToFp16 >

struct	LayerTypeOfImpl< LayerType::Convolution2d >

struct	LayerTypeOfImpl< LayerType::Convolution3d >

struct	LayerTypeOfImpl< LayerType::Debug >

struct	LayerTypeOfImpl< LayerType::DepthToSpace >

struct	LayerTypeOfImpl< LayerType::DepthwiseConvolution2d >

struct	LayerTypeOfImpl< LayerType::Dequantize >

struct	LayerTypeOfImpl< LayerType::DetectionPostProcess >

struct	LayerTypeOfImpl< LayerType::Division >

struct	LayerTypeOfImpl< LayerType::ElementwiseUnary >

struct	LayerTypeOfImpl< LayerType::FakeQuantization >

struct	LayerTypeOfImpl< LayerType::Fill >

struct	LayerTypeOfImpl< LayerType::Floor >

struct	LayerTypeOfImpl< LayerType::FullyConnected >

struct	LayerTypeOfImpl< LayerType::Gather >

struct	LayerTypeOfImpl< LayerType::GatherNd >

struct	LayerTypeOfImpl< LayerType::Input >

struct	LayerTypeOfImpl< LayerType::InstanceNormalization >

struct	LayerTypeOfImpl< LayerType::L2Normalization >

struct	LayerTypeOfImpl< LayerType::LogicalBinary >

struct	LayerTypeOfImpl< LayerType::LogSoftmax >

struct	LayerTypeOfImpl< LayerType::Lstm >

struct	LayerTypeOfImpl< LayerType::Map >

struct	LayerTypeOfImpl< LayerType::Maximum >

struct	LayerTypeOfImpl< LayerType::Mean >

struct	LayerTypeOfImpl< LayerType::MemCopy >

struct	LayerTypeOfImpl< LayerType::MemImport >

struct	LayerTypeOfImpl< LayerType::Merge >

struct	LayerTypeOfImpl< LayerType::Minimum >

struct	LayerTypeOfImpl< LayerType::Multiplication >

struct	LayerTypeOfImpl< LayerType::Normalization >

struct	LayerTypeOfImpl< LayerType::Output >

struct	LayerTypeOfImpl< LayerType::Pad >

struct	LayerTypeOfImpl< LayerType::Permute >

struct	LayerTypeOfImpl< LayerType::Pooling2d >

struct	LayerTypeOfImpl< LayerType::Pooling3d >

struct	LayerTypeOfImpl< LayerType::PreCompiled >

struct	LayerTypeOfImpl< LayerType::Prelu >

struct	LayerTypeOfImpl< LayerType::QLstm >

struct	LayerTypeOfImpl< LayerType::Quantize >

struct	LayerTypeOfImpl< LayerType::QuantizedLstm >

struct	LayerTypeOfImpl< LayerType::Rank >

struct	LayerTypeOfImpl< LayerType::Reduce >

struct	LayerTypeOfImpl< LayerType::Reshape >

struct	LayerTypeOfImpl< LayerType::Resize >

struct	LayerTypeOfImpl< LayerType::Shape >

struct	LayerTypeOfImpl< LayerType::Slice >

struct	LayerTypeOfImpl< LayerType::Softmax >

struct	LayerTypeOfImpl< LayerType::SpaceToBatchNd >

struct	LayerTypeOfImpl< LayerType::SpaceToDepth >

struct	LayerTypeOfImpl< LayerType::Splitter >

struct	LayerTypeOfImpl< LayerType::Stack >

struct	LayerTypeOfImpl< LayerType::StandIn >

struct	LayerTypeOfImpl< LayerType::StridedSlice >

struct	LayerTypeOfImpl< LayerType::Subtraction >

struct	LayerTypeOfImpl< LayerType::Switch >

struct	LayerTypeOfImpl< LayerType::Transpose >

struct	LayerTypeOfImpl< LayerType::TransposeConvolution2d >

struct	LayerTypeOfImpl< LayerType::UnidirectionalSequenceLstm >

struct	LayerTypeOfImpl< LayerType::Unmap >

class	LayerValidationException

class	LayerVisitorBase
	Visitor base class with empty implementations. More...

class	LayerWithParameters

class	LoadedNetwork

struct	log

struct	LogicalBinaryDescriptor
	A LogicalBinaryDescriptor for the LogicalBinaryLayer. More...

struct	LogicalBinaryFunction

class	LogicalBinaryLayer
	This layer represents a Logical Binary operation. More...

struct	LogicalBinaryQueueDescriptor

struct	LogicalUnaryFunction

class	LogSink

class	LogSoftmaxLayer
	This layer represents a log softmax operation. More...

struct	LogSoftmaxQueueDescriptor

struct	LstmBasicParameters

struct	LstmDescriptor
	An LstmDescriptor for the LstmLayer. More...

struct	LstmInputParams

struct	LstmInputParamsInfo

class	LstmLayer
	This layer represents a LSTM operation. More...

struct	LstmOptCifgParameters

struct	LstmOptLayerNormParameters

struct	LstmOptPeepholeParameters

struct	LstmOptProjectionParameters

struct	LstmQueueDescriptor

class	LstmVisitor

class	ManagedConstTensorHandle

class	MapLayer
	This layer represents a memory copy operation. More...

struct	MapQueueDescriptor

class	MapWorkload

struct	maximum

class	MaximumLayer
	This layer represents a maximum operation. More...

struct	MaximumQueueDescriptor

struct	MeanDescriptor
	A MeanDescriptor for the MeanLayer. More...

class	MeanLayer
	This layer represents a mean operation. More...

struct	MeanQueueDescriptor

struct	Measurement

struct	MemBin

struct	MemBlock

class	MemCopyLayer
	This layer represents a memory copy operation. More...

struct	MemCopyQueueDescriptor

class	MemImportLayer
	This layer represents a memory import operation. More...

struct	MemImportQueueDescriptor

class	MemoryExportException

class	MemoryImportException

class	MemoryManager

class	MemoryValidationException

struct	MemSyncQueueDescriptor

class	MergeLayer
	This layer dequantizes the input tensor. More...

struct	MergeQueueDescriptor

struct	minimum

class	MinimumLayer
	This layer represents a minimum operation. More...

struct	MinimumQueueDescriptor

class	MockBackend

class	MockBackendInitialiser

class	MockBackendProfilingContext

class	MockBackendProfilingService

class	MockImportBackend

class	MockImportBackendInitialiser

class	MockImportLayerSupport

class	MockLayerSupport

class	MockMemoryManager

class	MockTensorHandle

class	MockTensorHandleFactory

class	MockWorkloadFactory

class	MultiplicationLayer
	This layer represents a multiplication operation. More...

struct	MultiplicationQueueDescriptor

class	MultiTypedWorkload

class	NeonAbsWorkload

class	NeonActivationWorkload

class	NeonAdditionWorkload

class	NeonArgMinMaxWorkload

class	NeonBackend

class	NeonBackendModelContext
	The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions. More...

class	NeonBaseWorkload

class	NeonBatchNormalizationWorkload

class	NeonBatchToSpaceNdWorkload

class	NeonCastWorkload

class	NeonChannelShuffleWorkload

class	NeonComparisonWorkload

class	NeonConcatWorkload

class	NeonConstantWorkload

class	NeonConvertBf16ToFp32Workload

class	NeonConvertFp16ToFp32Workload

class	NeonConvertFp32ToBf16Workload

class	NeonConvertFp32ToFp16Workload

class	NeonConvolution2dWorkload

class	NeonConvolution3dWorkload

class	NeonDepthToSpaceWorkload

class	NeonDepthwiseConvolutionWorkload

class	NeonDequantizeWorkload

class	NeonDetectionPostProcessWorkload

class	NeonDivisionWorkload

class	NeonExpWorkload

class	NeonFillWorkload

class	NeonFloorFloatWorkload

class	NeonFullyConnectedWorkload

class	NeonGatherNdWorkload

class	NeonGatherWorkload

class	NeonInstanceNormalizationWorkload

class	NeonInterceptorScheduler

class	NeonL2NormalizationFloatWorkload

class	NeonLayerSupport

class	NeonLogicalAndWorkload

class	NeonLogicalNotWorkload

class	NeonLogicalOrWorkload

class	NeonLogSoftmaxWorkload

class	NeonLogWorkload

class	NeonLstmFloatWorkload

class	NeonMaximumWorkload

class	NeonMeanWorkload

class	NeonMemoryManager

class	NeonMinimumWorkload

class	NeonMultiplicationWorkload

class	NeonNegWorkload

class	NeonNormalizationFloatWorkload

class	NeonPadWorkload

class	NeonPermuteWorkload

class	NeonPooling2dWorkload

class	NeonPooling3dWorkload

class	NeonPreluWorkload

class	NeonQLstmWorkload

class	NeonQuantizedLstmWorkload

class	NeonQuantizeWorkload

struct	NeonRankWorkload

class	NeonReduceWorkload

class	NeonReshapeWorkload

class	NeonResizeWorkload

class	NeonRsqrtWorkload

class	NeonSinWorkload

class	NeonSliceWorkload

class	NeonSoftmaxWorkload

class	NeonSpaceToBatchNdWorkload

class	NeonSpaceToDepthWorkload

class	NeonSplitterWorkload

class	NeonSqrtWorkload

class	NeonStackWorkload

class	NeonStridedSliceWorkload

class	NeonSubTensorHandle

class	NeonSubtractionWorkload

class	NeonTensorHandle

class	NeonTensorHandleFactory

class	NeonTimer

class	NeonTransposeConvolution2dWorkload

class	NeonTransposeWorkload

class	NeonUnidirectionalSequenceLstmFloatWorkload

class	NeonUnidirectionalSequenceLstmWorkload

class	NeonWorkloadFactory

class	NetworkImpl
	Private implementation of INetwork. More...

class	NodeContent

struct	NormalizationDescriptor
	A NormalizationDescriptor for the NormalizationLayer. More...

class	NormalizationLayer
	This layer represents a normalization operation. More...

struct	NormalizationQueueDescriptor

struct	NoThrowStrategy

struct	NullDescriptor
	Null Descriptor used as a return value from the IConnectableLayer GetParameters method by layers which do not have a descriptor. More...

class	NullPointerException

class	NullWorkload

class	OpenClTimer
	OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). More...

class	Optimization

struct	OptimizationResult

class	OptimizationViews

class	OptimizedNetworkImpl

class	OptimizeForConnection

class	OptimizeForConnectionImpl
	Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...

class	OptimizeForExclusiveConnection

class	OptimizeForExclusiveConnectionImpl
	Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...

class	OptimizeForType

class	OptimizeForTypeImpl
	Wrapper Optimization base class that calls Wrapped::Run() for every layer of type BaseType. More...

class	OptimizeForTypeImpl< Layer, Wrapped >
	Specialization that calls Wrapped::Run() for any layer type. More...

class	Optimizer

struct	OptimizerOptions
	ArmNN performs an optimization on each model/network before it gets loaded for execution. More...

class	Optional

class	OptionalBase
	OptionalBase is the common functionality between reference and non-reference optional types. More...

class	OptionalReferenceSwitch
	The default implementation is the non-reference case. More...

class	OptionalReferenceSwitch< true, T >
	This is the special case for reference types. More...

struct	OriginsDescriptor
	An OriginsDescriptor for the ConcatLayer. More...

class	OutputHandler

class	OutputLayer
	A layer user-provided data can be bound to (e.g. inputs, outputs). More...

class	OutputSlot

struct	PadDescriptor
	A PadDescriptor for the PadLayer. More...

class	PadLayer
	This layer represents a pad operation. More...

struct	PadQueueDescriptor

class	ParseException

class	PassthroughTensorHandle

class	PerAxisIterator
	PerAxisIterator for per-axis quantization. More...

class	PermutationVector

struct	PermuteDescriptor
	A PermuteDescriptor for the PermuteLayer. More...

class	PermuteLayer
	This layer represents a permutation operation. More...

struct	PermuteQueueDescriptor

class	PolymorphicDowncastException

struct	Pooling2dDescriptor
	A Pooling2dDescriptor for the Pooling2dLayer. More...

class	Pooling2dLayer
	This layer represents a pooling 2d operation. More...

struct	Pooling2dQueueDescriptor

struct	Pooling3dDescriptor
	A Pooling3dDescriptor for the Pooling3dLayer. More...

class	Pooling3dLayer
	This layer represents a pooling 3d operation. More...

struct	Pooling3dQueueDescriptor

struct	PreCompiledDescriptor
	A PreCompiledDescriptor for the PreCompiledLayer. More...

class	PreCompiledLayer

struct	PreCompiledQueueDescriptor

class	PredicateResult

class	PreluLayer

struct	PreluQueueDescriptor

class	ProfilerImpl

class	ProfilerManager

class	ProfilingDetails
	ProfilingDetails class records any details associated with the operator and passes on for outputting to the user. More...

struct	ProgramBuilder

class	QASymm8Decoder

class	QASymm8Encoder

class	QASymmS8Decoder

class	QASymmS8Encoder

struct	QLstmBasicParameters

struct	QLstmDescriptor
	A QLstmDescriptor for the QLstmLayer. More...

class	QLstmLayer
	This layer represents a QLstm operation. More...

struct	QLstmOptCifgParameters

struct	QLstmOptLayerNormParameters

struct	QLstmOptPeepholeParameters

struct	QLstmOptProjectionParameters

struct	QLstmQueueDescriptor

class	QSymm16Decoder

class	QSymm16Encoder

class	QSymm8PerAxisDecoder

class	QSymm8PerAxisEncoder

class	QSymmS8Decoder

class	QSymmS8Encoder

struct	QuantizationParametersAreEqual

struct	QuantizedLstmInputParams

struct	QuantizedLstmInputParamsInfo

class	QuantizedLstmLayer
	This layer represents a QuantizedLstm operation. More...

struct	QuantizedLstmParameters

struct	QuantizedLstmQueueDescriptor

struct	QuantizedMultiplierSmallerThanOne
	Performs multiplication of an integer with a multiplier which is less than one, using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. More...

class	QuantizeLayer

struct	QuantizeQueueDescriptor

struct	QueueDescriptor

struct	QueueDescriptorWithParameters

class	RangeTracker

class	RankLayer

struct	RankQueueDescriptor

struct	ReduceDescriptor
	A ReduceDescriptor for the REDUCE operators. More...

class	ReduceLayer
	This layer represents a reduction operation. More...

struct	ReduceQueueDescriptor

class	RefActivationWorkload

class	RefArgMinMaxWorkload

class	RefBackend

class	RefBaseWorkload

class	RefBatchNormalizationWorkload

class	RefBatchToSpaceNdWorkload

class	RefCastWorkload

class	RefChannelShuffleWorkload

class	RefComparisonWorkload

class	RefConcatWorkload

class	RefConstantWorkload

class	RefConvertBf16ToFp32Workload

class	RefConvertFp16ToFp32Workload

class	RefConvertFp32ToBf16Workload

class	RefConvertFp32ToFp16Workload

class	RefConvolution2dWorkload

class	RefConvolution3dWorkload

class	RefDebugWorkload

class	RefDepthToSpaceWorkload

class	RefDepthwiseConvolution2dWorkload

class	RefDequantizeWorkload

class	RefDetectionPostProcessWorkload

class	RefElementwiseUnaryWorkload

class	RefElementwiseWorkload

class	RefFakeQuantizationFloat32Workload

class	RefFillWorkload

class	RefFloorWorkload

class	RefFullyConnectedWorkload

class	RefGatherNdWorkload

class	RefGatherWorkload

class	RefInstanceNormalizationWorkload

class	RefL2NormalizationWorkload

class	RefLayerSupport

class	RefLogicalBinaryWorkload

class	RefLogicalUnaryWorkload

class	RefLogSoftmaxWorkload

class	RefLstmWorkload

class	RefMeanWorkload

class	RefMemoryManager

class	RefNormalizationWorkload

class	RefPadWorkload

class	RefPermuteWorkload

class	RefPooling2dWorkload

class	RefPooling3dWorkload

class	RefPreluWorkload

class	RefQLstmWorkload

class	RefQuantizeWorkload

struct	RefRankWorkload

class	RefReduceWorkload

class	RefReshapeWorkload

class	RefResizeWorkload

struct	RefShapeWorkload

class	RefSliceWorkload

class	RefSoftmaxWorkload

class	RefSpaceToBatchNdWorkload

class	RefSpaceToDepthWorkload

class	RefSplitterWorkload

class	RefStackWorkload

class	RefStridedSliceWorkload

class	RefTensorHandle

class	RefTensorHandleFactory

class	RefTransposeConvolution2dWorkload

class	RefTransposeWorkload

class	RefUnidirectionalSequenceLstmWorkload

class	RefWorkloadFactory

struct	ReshapeDescriptor
	A ReshapeDescriptor for the ReshapeLayer. More...

class	ReshapeLayer
	This layer represents a reshape operation. More...

struct	ReshapeQueueDescriptor

struct	ResizeDescriptor
	A ResizeBilinearDescriptor for the ResizeBilinearLayer. More...

class	ResizeLayer
	This layer represents a resize operation. More...

struct	ResizeQueueDescriptor

struct	ResolveTypeImpl

struct	ResolveTypeImpl< DataType::BFloat16 >

struct	ResolveTypeImpl< DataType::Boolean >

struct	ResolveTypeImpl< DataType::Float16 >

struct	ResolveTypeImpl< DataType::Float32 >

struct	ResolveTypeImpl< DataType::QAsymmS8 >

struct	ResolveTypeImpl< DataType::QAsymmU8 >

struct	ResolveTypeImpl< DataType::QSymmS16 >

struct	ResolveTypeImpl< DataType::QSymmS8 >

struct	ResolveTypeImpl< DataType::Signed32 >

struct	ResolveTypeImpl< DataType::Signed64 >

struct	rsqrt

class	RsqrtLayer

struct	RsqrtQueueDescriptor

struct	Rule

class	RuntimeException

struct	RuntimeImpl

class	ScaledInt32Decoder

class	ScaledInt32PerAxisDecoder

class	ScopedProfilingEvent

struct	ScopedRecord

class	ScopedTensorHandle

class	ShapeLayer

struct	ShapeQueueDescriptor

struct	ShapesAreBroadcastCompatible

struct	ShapesAreSameRank

struct	ShapesAreSameTotalSize

class	SimpleLogger

struct	sin

class	SingleAxisPriorityList
	SingleAxisPriorityList sorts the MemBlocks according to some priority, then trys to place them into as few bins as possible. More...

struct	SliceDescriptor
	A SliceDescriptor for the SliceLayer. More...

class	SliceLayer

struct	SliceQueueDescriptor

struct	SoftmaxDescriptor
	A SoftmaxDescriptor for the SoftmaxLayer. More...

class	SoftmaxLayer
	This layer represents a softmax operation. More...

struct	SoftmaxQueueDescriptor

struct	SpaceToBatchNdDescriptor
	A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer. More...

class	SpaceToBatchNdLayer
	This layer represents a SpaceToBatchNd operation. More...

struct	SpaceToBatchNdQueueDescriptor

struct	SpaceToDepthDescriptor
	A SpaceToDepthDescriptor for the SpaceToDepthLayer. More...

class	SpaceToDepthLayer
	This layer represents a SpaceToDepth operation. More...

struct	SpaceToDepthQueueDescriptor

class	SplitterLayer
	This layer represents a split operation. More...

struct	SplitterQueueDescriptor

struct	sqrt

struct	StackDescriptor
	A StackDescriptor for the StackLayer. More...

class	StackLayer
	This layer represents a stack operation. More...

struct	StackQueueDescriptor

class	StandardOutputSink

struct	StandInDescriptor
	A StandInDescriptor for the StandIn layer. More...

class	StandInLayer
	This layer represents an unknown operation in the input graph. More...

class	StrategyBase
	Strategy base class with empty implementations. More...

struct	StrategyFactory

class	StrategyValidator

struct	StridedSliceDescriptor
	A StridedSliceDescriptor for the StridedSliceLayer. More...

class	StridedSliceLayer
	This layer represents a strided slice operation. More...

struct	StridedSliceQueueDescriptor

struct	StringifyLayerParameters
	StringifyLayerParameters allows serializing layer parameters to string. More...

struct	StringifyLayerParameters< ActivationDescriptor >

struct	StringifyLayerParameters< BatchNormalizationDescriptor >

struct	StringifyLayerParameters< BatchToSpaceNdDescriptor >

struct	StringifyLayerParameters< ChannelShuffleDescriptor >

struct	StringifyLayerParameters< ComparisonDescriptor >

struct	StringifyLayerParameters< Convolution2dDescriptor >

struct	StringifyLayerParameters< Convolution3dDescriptor >

struct	StringifyLayerParameters< DepthwiseConvolution2dDescriptor >

struct	StringifyLayerParameters< DetectionPostProcessDescriptor >

struct	StringifyLayerParameters< ElementwiseUnaryDescriptor >

struct	StringifyLayerParameters< FakeQuantizationDescriptor >

struct	StringifyLayerParameters< FullyConnectedDescriptor >

struct	StringifyLayerParameters< L2NormalizationDescriptor >

struct	StringifyLayerParameters< LstmDescriptor >

struct	StringifyLayerParameters< MeanDescriptor >

struct	StringifyLayerParameters< NormalizationDescriptor >

struct	StringifyLayerParameters< OriginsDescriptor >

struct	StringifyLayerParameters< PadDescriptor >

struct	StringifyLayerParameters< PermuteDescriptor >

struct	StringifyLayerParameters< Pooling2dDescriptor >

struct	StringifyLayerParameters< Pooling3dDescriptor >

struct	StringifyLayerParameters< PreCompiledDescriptor >

struct	StringifyLayerParameters< ReduceDescriptor >

struct	StringifyLayerParameters< ReshapeDescriptor >

struct	StringifyLayerParameters< ResizeDescriptor >

struct	StringifyLayerParameters< SoftmaxDescriptor >

struct	StringifyLayerParameters< SpaceToBatchNdDescriptor >

struct	StringifyLayerParameters< SpaceToDepthDescriptor >

struct	StringifyLayerParameters< StackDescriptor >

struct	StringifyLayerParameters< StridedSliceDescriptor >

struct	StringifyLayerParameters< TransposeConvolution2dDescriptor >

struct	StringifyLayerParameters< TransposeDescriptor >

struct	StringifyLayerParameters< ViewsDescriptor >

struct	StringMapping
	StringMapping is helper class to be able to use strings as template parameters, so this allows simplifying code which only differs in a string, such as a debug string literal. More...

class	SubgraphView
	The SubgraphView class represents a subgraph of a Graph. More...

class	SubgraphViewSelector
	Algorithm that splits a Graph into Subgraphs based on a filtering of layers (e.g. More...

class	SubtractionLayer
	This layer represents a subtraction operation. More...

struct	SubtractionQueueDescriptor

class	SwitchLayer
	This layer calculates both true and false outputs for input. More...

struct	SwitchQueueDescriptor

class	SyncMemGenericWorkload

class	Tensor
	A tensor defined by a TensorInfo (shape and data type) and a mutable backing store. More...

class	TensorBufferArrayView

class	TensorHandle

class	TensorHandleFactoryRegistry

class	TensorInfo

struct	TensorMemory

struct	TensorNumDimensionsAreCorrect

class	TensorShape

class	TestBatchNormalizationLayerVisitor

class	TestConstantLayerVisitor

class	TestConvolution2dLayerVisitor

class	TestDepthwiseConvolution2dLayerVisitor

class	TestFullyConnectedLayerVistor

class	TestInputLayerVisitor

class	TestLayerVisitor

class	TestLstmLayerVisitor

class	TestOutputLayerVisitor

class	TestQLstmLayerVisitor

class	TestQuantizedLstmLayerVisitor

class	TestStrategy

struct	ThrowingStrategy

class	TimeoutException

class	TransformIterator

struct	TransposeConvolution2dDescriptor
	A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer. More...

class	TransposeConvolution2dLayer
	This layer represents a 2D transpose convolution operation. More...

struct	TransposeConvolution2dQueueDescriptor

struct	TransposeDescriptor
	A TransposeDescriptor for the TransposeLayer. More...

class	TransposeLayer
	This layer represents a transpose operation. More...

struct	TransposeQueueDescriptor

struct	TypeAnyOf

class	TypedIterator

class	TypedWorkload

struct	TypeIs

struct	TypeNotPerAxisQuantized

struct	TypesAreEqual

class	UnidirectionalSequenceLstmLayer
	This layer represents a LSTM operation. More...

struct	UnidirectionalSequenceLstmQueueDescriptor

class	UnimplementedException

class	UnmapLayer
	This layer represents a memory copy operation. More...

struct	UnmapQueueDescriptor

class	UnmapWorkload

struct	ViewsDescriptor
	A ViewsDescriptor for the SplitterLayer. More...

struct	VisitorNoThrowPolicy

struct	VisitorThrowingPolicy

class	WallClockTimer

class	WorkloadDataCollector

class	WorkloadFactoryBase

struct	WorkloadInfo
	Contains information about TensorInfos of a layer. More...

Typedefs
using	BackendIdVector = std::vector< BackendId >

using	BackendIdSet = std::unordered_set< BackendId >

using	NetworkOptions = std::vector< BackendOptions >

using	ModelOptions = std::vector< BackendOptions >

using	BackendCapabilities = BackendOptions

using	IBackendInternalUniquePtr = std::unique_ptr< IBackendInternal >

using	MemoryOptimizerStrategiesMapRef = std::unordered_map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > >

using	DynamicBackendPtr = std::unique_ptr< DynamicBackend >

using	IBackendContextUniquePtr = std::unique_ptr< IBackendContext >

using	ILayerSupportSharedPtr = std::shared_ptr< ILayerSupport >

using	IMemoryManagerUniquePtr = std::unique_ptr< IMemoryManager >

using	instead = ConstTensorHandle

template<typename QueueDescriptor >
using	FloatWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Float32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Uint8Workload = TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 >

template<typename QueueDescriptor >
using	Int32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Signed32 >

template<typename QueueDescriptor >
using	BooleanWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Boolean >

template<typename QueueDescriptor >
using	BaseFloat32ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean >

template<typename QueueDescriptor >
using	BaseUint8ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean >

template<typename QueueDescriptor >
using	BFloat16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Float32ToBFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16 >

template<typename QueueDescriptor >
using	Float16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >

template<typename QueueDescriptor >
using	Float32ToFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16 >

template<typename QueueDescriptor >
using	Uint8ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32 >

using	InputQueueDescriptor = MemCopyQueueDescriptor

using	OutputQueueDescriptor = MemCopyQueueDescriptor

using	MergerQueueDescriptor = ConcatQueueDescriptor

using	LogSoftmaxDescriptor = SoftmaxDescriptor
	A LogSoftmaxDescriptor for the LogSoftmaxLayer. More...

using	DepthToSpaceDescriptor = SpaceToDepthDescriptor
	A DepthToSpaceDescriptor for the DepthToSpaceLayer. More...

using	UnidirectionalSequenceLstmDescriptor = LstmDescriptor

using	ConcatDescriptor = OriginsDescriptor

using	MergerDescriptor = OriginsDescriptor
	MergerDescriptor is deprecated, use ConcatDescriptor instead. More...

using	SplitterDescriptor = ViewsDescriptor

using	INetworkPtr = std::unique_ptr< INetwork, void()(INetwork network)>

using	IOptimizedNetworkPtr = std::unique_ptr< IOptimizedNetwork, void()(IOptimizedNetwork network)>

using	CompiledBlobDeleter = std::function< void(const void *)>

using	CompiledBlobPtr = std::unique_ptr< void, CompiledBlobDeleter >

using	NetworkId = int

using	IRuntimePtr = std::unique_ptr< IRuntime, void()(IRuntime runtime)>

using	IGpuAccTunedParametersPtr = std::shared_ptr< IGpuAccTunedParameters >
	The following API is replaced by the backend options API. More...

using	MemorySourceFlags = unsigned int

using	BindingPointInfo = std::pair< armnn::LayerBindingId, armnn::TensorInfo >

using	InputTensors = std::vector< std::pair< LayerBindingId, class ConstTensor > >

using	OutputTensors = std::vector< std::pair< LayerBindingId, class Tensor > >

using	IBackendSharedPtr = std::shared_ptr< IBackend >

using	IBackendUniquePtr = std::unique_ptr< IBackend, void()(IBackend backend)>

using	LayerBindingId = int
	Type of identifiers for bindable layers (inputs, outputs). More...

using	ImportedInputId = unsigned int

using	ImportedOutputId = unsigned int

using	DebugCallbackFunction = std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)>
	Define the type of callback for the Debug layer to call. More...

using	HighResolutionClock = std::chrono::high_resolution_clock::time_point
	Define a timer and associated inference ID for recording execution times. More...

using	InferenceTimingPair = std::pair< HighResolutionClock, HighResolutionClock >

using	TensorInfos = std::vector< TensorInfo >

using	WorkloadQueue = std::vector< std::unique_ptr< IWorkload > >

using	Coordinates = std::array< unsigned int, MaxNumOfTensorDimensions >

using	Dimensions = std::array< unsigned int, MaxNumOfTensorDimensions >

using	LayerPriority = unsigned int

using	AdditionalInfoObjectPtr = std::shared_ptr< void >

using	PreCompiledObjectDeleter = std::function< void(const void *)>

using	PreCompiledObjectPtr = std::unique_ptr< void, PreCompiledObjectDeleter >

template<LayerType Type>
using	LayerTypeOf = typename LayerTypeOfImpl< Type >::Type

using	NetworkImplPtr = std::unique_ptr< NetworkImpl, void()(NetworkImpl network)>

using	BackendsMap = std::map< BackendId, std::unique_ptr< class IBackendInternal > >

template<DataType DT>
using	ResolveType = typename ResolveTypeImpl< DT >::Type

using	LoadedNetworks = std::unordered_map< NetworkId, std::unique_ptr< LoadedNetwork > >

using	IReportStructure = arm::pipe::IReportStructure

using	IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

using	ParameterStringifyFunction = std::function< void(const std::string &name, const std::string &value)>

using	FactoryId = ITensorHandleFactory::FactoryId

using	Half = half_float::half

using	CopyAndImportFactoryPairs = std::map< ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId >

using	ACLMemManagerOnDemand = std::shared_ptr< arm_compute::MemoryManagerOnDemand >

using	RefDebugBFloat16Workload = RefDebugWorkload< DataType::BFloat16 >

using	RefDebugFloat16Workload = RefDebugWorkload< DataType::Float16 >

using	RefDebugFloat32Workload = RefDebugWorkload< DataType::Float32 >

using	RefDebugQAsymmU8Workload = RefDebugWorkload< DataType::QAsymmU8 >

using	RefDebugQAsymmS8Workload = RefDebugWorkload< DataType::QAsymmS8 >

using	RefDebugQSymmS16Workload = RefDebugWorkload< DataType::QSymmS16 >

using	RefDebugQSymmS8Workload = RefDebugWorkload< DataType::QSymmS8 >

using	RefDebugSigned32Workload = RefDebugWorkload< DataType::Signed32 >

template<typename DataType = float>
using	RefAdditionWorkload = RefElementwiseWorkload< std::plus< DataType >, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute >

template<typename DataType = float>
using	RefSubtractionWorkload = RefElementwiseWorkload< std::minus< DataType >, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute >

template<typename DataType = float>
using	RefMultiplicationWorkload = RefElementwiseWorkload< std::multiplies< DataType >, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute >

template<typename DataType = float>
using	RefDivisionWorkload = RefElementwiseWorkload< std::divides< DataType >, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute >

template<typename DataType = float>
using	RefMaximumWorkload = RefElementwiseWorkload< armnn::maximum< DataType >, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute >

template<typename DataType = float>
using	RefMinimumWorkload = RefElementwiseWorkload< armnn::minimum< DataType >, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute >

using	RefPermuteBFloat16Workload = RefPermuteWorkload< DataType::BFloat16 >

using	RefPermuteFloat16Workload = RefPermuteWorkload< DataType::Float16 >

using	RefPermuteFloat32Workload = RefPermuteWorkload< DataType::Float32 >

using	RefPermuteQAsymmS8Workload = RefPermuteWorkload< DataType::QAsymmS8 >

using	RefPermuteQAsymm8Workload = RefPermuteWorkload< DataType::QAsymmU8 >

using	RefPermuteQSymm16Workload = RefPermuteWorkload< DataType::QSymmS16 >

using	RefTransposeBFloat16Workload = RefTransposeWorkload< DataType::BFloat16 >

using	RefTransposeFloat16Workload = RefTransposeWorkload< DataType::Float16 >

using	RefTransposeFloat32Workload = RefTransposeWorkload< DataType::Float32 >

using	RefTransposeQAsymmS8Workload = RefTransposeWorkload< DataType::QAsymmS8 >

using	RefTransposeQAsymm8Workload = RefTransposeWorkload< DataType::QAsymmU8 >

using	RefTransposeQSymm16Workload = RefTransposeWorkload< DataType::QSymmS16 >

Enumerations
enum	Compute { Undefined = 0, CpuRef = 1, CpuAcc = 2, GpuAcc = 3 }
	The Compute enum is now deprecated and it is now being replaced by BackendId. More...

enum	CapabilityClass { PaddingRequired = 1, FallbackImportDisabled = 2, CapabilityClassMax = 254 }
	Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate. More...

enum	EdgeStrategy { Undefined, DirectCompatibility, ExportToTarget, CopyToTarget }

enum	BoostLogSeverityMapping { trace, debug, info, warning, error, fatal }

enum	Status { Success = 0, Failure = 1 }
	enumeration More...

enum	DataType { Float16 = 0, Float32 = 1, QAsymmU8 = 2, Signed32 = 3, Boolean = 4, QSymmS16 = 5, QSymmS8 = 6, QAsymmS8 = 7, BFloat16 = 8, Signed64 = 9 }

enum	DataLayout { NCHW = 1, NHWC = 2, NDHWC = 3, NCDHW = 4 }

enum	ProfilingDetailsMethod { Undefined = 0, DetailsWithEvents = 1, DetailsOnly = 2 }
	Define the behaviour of the internal profiler when outputting network details. More...

enum	QosExecPriority { Low = 0, Medium = 1, High = 2 }

enum	ActivationFunction { Sigmoid = 0, TanH = 1, Linear = 2, ReLu = 3, BoundedReLu = 4, SoftReLu = 5, LeakyReLu = 6, Abs = 7, Sqrt = 8, Square = 9, Elu = 10, HardSwish = 11 }

enum	ArgMinMaxFunction { Min = 0, Max = 1 }

enum	ComparisonOperation { Equal = 0, Greater = 1, GreaterOrEqual = 2, Less = 3, LessOrEqual = 4, NotEqual = 5 }

enum	LogicalBinaryOperation { LogicalAnd = 0, LogicalOr = 1 }

enum	UnaryOperation { Abs = 0, Exp = 1, Sqrt = 2, Rsqrt = 3, Neg = 4, LogicalNot = 5, Log = 6, Sin = 7 }

enum	PoolingAlgorithm { Max = 0, Average = 1, L2 = 2 }

enum	ReduceOperation { Sum = 0, Max = 1, Mean = 2, Min = 3, Prod = 4 }

enum	ResizeMethod { Bilinear = 0, NearestNeighbor = 1 }

enum	Dimensionality { NotSpecified = 0, Specified = 1, Scalar = 2 }

enum	PaddingMethod { IgnoreValue = 0, Exclude = 1 }
	The padding method modifies the output of pooling layers. More...

enum	PaddingMode { Constant = 0, Reflect = 1, Symmetric = 2 }
	The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect). More...

enum	NormalizationAlgorithmChannel { Across = 0, Within = 1 }

enum	NormalizationAlgorithmMethod { LocalBrightness = 0, LocalContrast = 1 }

enum	OutputShapeRounding { Floor = 0, Ceiling = 1 }

enum	ShapeInferenceMethod { ValidateOnly = 0, InferAndValidate = 1 }
	The ShapeInferenceMethod modify how the output shapes are treated. More...

enum	MemorySource : uint32_t { Undefined = 0, Malloc = 1, DmaBuf = 2, DmaBufProtected = 4, Gralloc = 5 }
	Define the Memory Source to reduce copies. More...

enum	MemBlockStrategyType { SingleAxisPacking = 0, MultiAxisPacking = 1 }

enum	BackendCapability : uint32_t { NonConstWeights, AsyncExecution }
	BackendCapability class. More...

enum	LayerType { X, Activation, Addition, ArgMinMax, BatchNormalization, BatchToSpaceNd, Comparison, Concat, Constant, ConvertBf16ToFp32, ConvertFp16ToFp32, ConvertFp32ToBf16, ConvertFp32ToFp16, Convolution2d, Debug, DepthToSpace, DepthwiseConvolution2d, Dequantize, DetectionPostProcess, Division, ElementwiseUnary, FakeQuantization, Fill, Floor, FullyConnected, Gather, Input, InstanceNormalization, L2Normalization, LogicalBinary, LogSoftmax, Lstm, QLstm, Map, Maximum, Mean, MemCopy, MemImport, Merge, Minimum, Multiplication, Normalization, Output, Pad, Permute, Pooling2d, PreCompiled, Prelu, Quantize, QuantizedLstm, Reshape, Rank, Resize, Reduce, Slice, Softmax, SpaceToBatchNd, SpaceToDepth, Splitter, Stack, StandIn, StridedSlice, Subtraction, Switch, Transpose, TransposeConvolution2d, Unmap, Cast, Shape, UnidirectionalSequenceLstm, ChannelShuffle, Convolution3d, Pooling3d, GatherNd, FirstLayer = Activation, LastLayer = UnidirectionalSequenceLstm }
	When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below. More...

enum	LogSeverity { Trace, Debug, Info, Warning, Error, Fatal }

enum	GraphEvent { LayerAdded, LayerErased }

enum	JsonObjectType { Measurement, Event, ExecObjectDesc }

enum	TuningLevel { None, Rapid, Normal, Exhaustive }

Functions
LayerSupportHandle	GetILayerSupportByBackendId (const armnn::BackendId &backend)
	Convenience function to retrieve the ILayerSupportHandle for a backend. More...

bool	HasCapability (const std::string &name, const BackendCapabilities &capabilities)
	Convenience function to check if a capability exists in a BackendCapabilites struct. More...

bool	HasCapability (const std::string &name, const armnn::BackendId &backend)
	Convenience function to check if a capability exists in a backend. More...

bool	HasCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
	Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...

bool	HasCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
	Convenience function to check if a given capability matches a capability in a backend. More...

Optional< const BackendOptions::BackendOption >	GetCapability (const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
	Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...

Optional< const BackendOptions::BackendOption >	GetCapability (const std::string &backendCapabilityName, const armnn::BackendId &backend)
	Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...

bool	IsCapabilitySupported (const armnn::BackendId &backend, armnn::BackendCapability capability)
	Convenience function to check a capability on a backend. More...

unsigned int	GetNumberOfCacheFiles (const armnn::BackendId &backend)
	Returns the number of cached files if backend supports caching. More...

constexpr char const *	GetComputeDeviceAsCString (Compute compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const std::vector< Compute > &compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const std::set< Compute > &compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const Compute &compute)
	Deprecated function that will be removed together with the Compute enum. More...

std::ostream &	operator<< (std::ostream &os, const BackendId &id)

template<template< typename... > class TContainer, typename... TContainerTemplateArgs>
std::ostream &	operator<< (std::ostream &os, const TContainer< BackendId, TContainerTemplateArgs... > &ids)

template<typename F >
void	ParseOptions (const std::vector< BackendOptions > &options, BackendId backend, F f)

bool	ParseBooleanBackendOption (const armnn::BackendOptions::Var &value, bool defaultValue)

std::string	ParseStringBackendOption (const armnn::BackendOptions::Var &value, std::string defaultValue)

int	ParseIntBackendOption (const armnn::BackendOptions::Var &value, int defaultValue)

BackendRegistry &	BackendRegistryInstance ()

std::ostream &	operator<< (std::ostream &os, const BackendVersion &backendVersion)

TensorShape	GetUnpaddedTensorStrides (const TensorInfo &tensorInfo)

DataType	GetBiasDataType (DataType inputDataType)

ARMNN_NO_DEPRECATE_WARN_BEGIN struct	ARMNN_DEPRECATED_MSG_REMOVAL_DATE ("ResizeBilinearQueueDescriptor is deprecated use ResizeQueueDescriptor instead", "22.08") ResizeBilinearQueueDescriptor

template<typename TensorShapeIt >
OriginsDescriptor	CreateDescriptorForConcatenation (TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
	Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors. More...

template<typename ExceptionType >
void	ConditionalThrow (bool condition, const std::string &message)

template<typename ExceptionType >
void	ConditionalThrow (bool condition)

template<typename ExceptionType , typename ComparedType >
void	ConditionalThrowIfNotEqual (const std::string &message, const ComparedType &leftHandSide, const ComparedType &rightHandSide)
	ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&) More...

class	ARMNN_DEPRECATED_MSG_REMOVAL_DATE ("Use ABI stable IStrategy instead.", "22.05") ILayerVisitor

IOptimizedNetworkPtr	Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
	Create an optimized version of the network. More...

IOptimizedNetworkPtr	Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > &> messages=EmptyOptional())
	Create an optimized version of the network. More...

bool	IsActivationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsAdditionSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsBatchNormalizationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsBatchToSpaceNdSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsConcatSupported (const BackendId &backend, const std::vector< const TensorInfo > inputs, const TensorInfo &output, const OriginsDescriptor &descriptor, char reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsConstantSupported (const BackendId &backend, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsConvertFp16ToFp32Supported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsConvertFp32ToFp16Supported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsConvolution2dSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsDebugSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsDepthwiseConvolutionSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsDequantizeSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsDivisionSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsEqualSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsFakeQuantizationSupported (const BackendId &backend, const TensorInfo &input, const FakeQuantizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsFloorSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsFullyConnectedSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsGreaterSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsInputSupported (const BackendId &backend, const TensorInfo &input, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsL2NormalizationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsLstmSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsMaximumSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnSupported=nullptr, size_t reasonIfUnSupportedMaxLength=0)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsMeanSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsMemCopySupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsMergeSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsMinimumSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsMultiplicationSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsNormalizationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsOutputSupported (const BackendId &backend, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsPadSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsPermuteSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsPreCompiledSupported (const BackendId &backend, const TensorInfo &input, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsPreluSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsPooling2dSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsQuantizedLstmSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsReduceSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsReshapeSupported (const BackendId &backend, const TensorInfo &input, const ReshapeDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsResizeSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsRsqrtSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsSoftmaxSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsSpaceToBatchNdSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsSpaceToDepthSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsSplitterSupported (const BackendId &backend, const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, const ViewsDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsStackSupported (const BackendId &backend, const std::vector< const TensorInfo > inputs, const TensorInfo &output, const StackDescriptor &descriptor, char reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsStridedSliceSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsSubtractionSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsSwitchSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output0, const TensorInfo &output1, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

bool	IsTransposeConvolution2dSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
	Deprecated in favor of IBackend and ILayerSupport interfaces. More...

std::string	LevelToString (LogSeverity level)

LogSeverity	StringToLogLevel (std::string level)

void	SetLogFilter (LogSeverity level)

void	SetAllLoggingSinks (bool standardOut, bool debugOut, bool coloured)

constexpr LogSeverity	ConvertLogSeverity (BoostLogSeverityMapping severity)

template<typename Arg , typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags	Combine (Arg sourceA, Arg sourceB)

template<typename Arg , typename ... Args, typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags	Combine (Arg source, Args... rest)

bool	CheckFlag (MemorySourceFlags flags, MemorySource source)

template<typename T , class... Args>
Optional< T >	MakeOptional (Args &&... args)
	Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object. More...

const char *	GetLayerTypeAsCString (LayerType type)

constexpr char const *	GetStatusAsCString (Status status)

constexpr char const *	GetActivationFunctionAsCString (ActivationFunction activation)

constexpr char const *	GetArgMinMaxFunctionAsCString (ArgMinMaxFunction function)

constexpr char const *	GetComparisonOperationAsCString (ComparisonOperation operation)

constexpr char const *	GetUnaryOperationAsCString (UnaryOperation operation)

constexpr char const *	GetLogicalBinaryOperationAsCString (LogicalBinaryOperation operation)

constexpr char const *	GetPoolingAlgorithmAsCString (PoolingAlgorithm pooling)

constexpr char const *	GetOutputShapeRoundingAsCString (OutputShapeRounding rounding)

constexpr char const *	GetPaddingMethodAsCString (PaddingMethod method)

constexpr char const *	GetPaddingModeAsCString (PaddingMode mode)

constexpr char const *	GetReduceOperationAsCString (ReduceOperation reduce_operation)

constexpr unsigned int	GetDataTypeSize (DataType dataType)

template<unsigned N>
constexpr bool	StrEqual (const char *strA, const char(&strB)[N])

constexpr armnn::Compute	ParseComputeDevice (const char *str)
	Deprecated function that will be removed together with the Compute enum. More...

constexpr const char *	GetDataTypeName (DataType dataType)

constexpr const char *	GetDataLayoutName (DataLayout dataLayout)

constexpr const char *	GetNormalizationAlgorithmChannelAsCString (NormalizationAlgorithmChannel channel)

constexpr const char *	GetNormalizationAlgorithmMethodAsCString (NormalizationAlgorithmMethod method)

constexpr const char *	GetResizeMethodAsCString (ResizeMethod method)

constexpr const char *	GetMemBlockStrategyTypeName (MemBlockStrategyType memBlockStrategyType)

template<typename T >
constexpr bool	IsQuantizedType ()

constexpr bool	IsQuantized8BitType (DataType dataType)

constexpr bool	IsQuantizedType (DataType dataType)

std::ostream &	operator<< (std::ostream &os, Status stat)

std::ostream &	operator<< (std::ostream &os, const armnn::TensorShape &shape)

template<typename QuantizedType >
QuantizedType	Quantize (float value, float scale, int32_t offset)
	Quantize a floating point data type into an 8-bit data type. More...

template<typename QuantizedType >
float	Dequantize (QuantizedType value, float scale, int32_t offset)
	Dequantize an 8-bit data type into a floating point data type. More...

void	VerifyTensorInfoDataType (const armnn::TensorInfo &info, armnn::DataType dataType)

template<typename ... Ts>
void	IgnoreUnused (Ts &&...)

template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_floating_point< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_floating_point< Dest >::value, Dest >	numeric_cast (Source source)

template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Dest >::value &&std::is_integral< Dest >::value &&std::is_unsigned< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Dest >::value &&std::is_unsigned< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_signed< Source >::value &&std::is_integral< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_floating_point< Source >::value, Dest >	numeric_cast (Source sValue)

template<typename DestType , typename SourceType >
DestType	PolymorphicDowncast (SourceType *value)
	Polymorphic downcast for build in pointers only. More...

template<typename DestType , typename SourceType >
auto	PolymorphicPointerDowncast (const SourceType &value)
	Polymorphic downcast for shared pointers and build in pointers. More...

std::chrono::high_resolution_clock::time_point	GetTimeNow ()

std::chrono::duration< double, std::milli >	GetTimeDuration (std::chrono::high_resolution_clock::time_point start_time)

template<typename Function , typename Iterator >
constexpr TransformIterator< Function, Iterator >	MakeTransformIterator (Iterator i, Function f)

void	ConfigureLogging (bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
	Configures the logging behaviour of the ARMNN library. More...

bool	NeonDetected ()

const std::string	GetVersion ()

void	swap (OriginsDescriptor &first, OriginsDescriptor &second)

void	swap (ViewsDescriptor &first, ViewsDescriptor &second)

uint32_t	GetNumInputs (bool biasEnabled)

void	AssertNumberOfInputSlots (Layer &layer)

template<typename T >
constexpr LayerType	LayerEnumOf (const T *=nullptr)

template<>
constexpr LayerType	LayerEnumOf (const ActivationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const AdditionLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ArgMinMaxLayer *)

template<>
constexpr LayerType	LayerEnumOf (const BatchNormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const BatchToSpaceNdLayer *)

template<>
constexpr LayerType	LayerEnumOf (const CastLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ChannelShuffleLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ComparisonLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ConcatLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ConstantLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ConvertBf16ToFp32Layer *)

template<>
constexpr LayerType	LayerEnumOf (const ConvertFp16ToFp32Layer *)

template<>
constexpr LayerType	LayerEnumOf (const ConvertFp32ToBf16Layer *)

template<>
constexpr LayerType	LayerEnumOf (const ConvertFp32ToFp16Layer *)

template<>
constexpr LayerType	LayerEnumOf (const Convolution2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const Convolution3dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DebugLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DepthToSpaceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DepthwiseConvolution2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DequantizeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DetectionPostProcessLayer *)

template<>
constexpr LayerType	LayerEnumOf (const DivisionLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ElementwiseUnaryLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FakeQuantizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FillLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FloorLayer *)

template<>
constexpr LayerType	LayerEnumOf (const FullyConnectedLayer *)

template<>
constexpr LayerType	LayerEnumOf (const GatherLayer *)

template<>
constexpr LayerType	LayerEnumOf (const GatherNdLayer *)

template<>
constexpr LayerType	LayerEnumOf (const InputLayer *)

template<>
constexpr LayerType	LayerEnumOf (const InstanceNormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const L2NormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const LogicalBinaryLayer *)

template<>
constexpr LayerType	LayerEnumOf (const LogSoftmaxLayer *)

template<>
constexpr LayerType	LayerEnumOf (const LstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MapLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MaximumLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MeanLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MemCopyLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MemImportLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MergeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MinimumLayer *)

template<>
constexpr LayerType	LayerEnumOf (const MultiplicationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const NormalizationLayer *)

template<>
constexpr LayerType	LayerEnumOf (const OutputLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PadLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PermuteLayer *)

template<>
constexpr LayerType	LayerEnumOf (const Pooling2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const Pooling3dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PreCompiledLayer *)

template<>
constexpr LayerType	LayerEnumOf (const PreluLayer *)

template<>
constexpr LayerType	LayerEnumOf (const QuantizeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const QLstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const QuantizedLstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const RankLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ReduceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ReshapeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ResizeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const ShapeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SliceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SoftmaxLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SpaceToBatchNdLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SpaceToDepthLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SplitterLayer *)

template<>
constexpr LayerType	LayerEnumOf (const StackLayer *)

template<>
constexpr LayerType	LayerEnumOf (const StandInLayer *)

template<>
constexpr LayerType	LayerEnumOf (const StridedSliceLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SubtractionLayer *)

template<>
constexpr LayerType	LayerEnumOf (const SwitchLayer *)

template<>
constexpr LayerType	LayerEnumOf (const TransposeLayer *)

template<>
constexpr LayerType	LayerEnumOf (const TransposeConvolution2dLayer *)

template<>
constexpr LayerType	LayerEnumOf (const UnidirectionalSequenceLstmLayer *)

template<>
constexpr LayerType	LayerEnumOf (const UnmapLayer *)

template<typename T , typename V >
void	SetValueChecked (Optional< T &> optionalRef, V &&val)

template<typename Float16Func , typename Float32Func , typename Uint8Func , typename Int32Func , typename BooleanFunc , typename ... Params>
bool	IsSupportedForDataTypeGeneric (Optional< std::string &> reasonIfUnsupported, DataType dataType, Float16Func float16FuncPtr, Float32Func float32FuncPtr, Uint8Func uint8FuncPtr, Int32Func int32FuncPtr, BooleanFunc booleanFuncPtr, Params &&... params)

template<typename ... Params>
bool	TrueFunc (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFunc (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncF16 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncF32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncU8 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseFuncI32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseInputFuncF32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseInputFuncF16 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseOutputFuncF32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

template<typename ... Params>
bool	FalseOutputFuncF16 (Optional< std::string &> reasonIfUnsupported, Params &&... params)

void	CopyToOutputTensor (const Tensor &outputTensor, ITensorHandle *outputTensorHandle)

const armnn::ConstTensor	GetInputTensor (const LayerBindingId layerId, const InputTensors &inputTensors)

const armnn::Tensor	GetOutputTensor (const LayerBindingId layerId, const OutputTensors &outputTensors)

template<LogSeverity Level>
void	SetLoggingSinks (bool standardOut, bool debugOut, bool coloured)

void	ReportError (const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)

void	ReportWarning (const std::string &warningMessage, Optional< std::vector< std::string > &> warningMessages)

OptimizationResult	ReturnWithError (OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > &> errMessages)

bool	CheckScaleSetOnQuantizedType (Layer *layer, Optional< std::vector< std::string > &> errMessages)

template<typename LayerT >
LayerT *	ConvertBf16ToFp32Weight (Layer *l)

OptimizationResult	AttemptBackendAssignment (BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > &> errMessages)

void	AssignBackendsIConnectable (OptimizedNetworkImpl optNetObjPtr, IConnectableLayer it, Optional< std::vector< std::string > &> errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)

OptimizationResult	AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, Graph::Iterator &firstLayer, Graph::Iterator &lastLayer, Optional< std::vector< std::string > &> errMessages)

OptimizationResult	AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView::IConnectableLayerIterator &firstLayer, SubgraphView::IConnectableLayerIterator &lastLayer, Optional< std::vector< std::string > &> errMessages)

OptimizationResult	AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > &> errMessages)

BackendsMap	CreateSupportedBackends (TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)

OptimizationResult	ApplyBackendOptimizations (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > &> errMessages)

bool	RequiresCopy (ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)

ITensorHandleFactory::FactoryId	CalculateSlotOptionForInput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)

ITensorHandleFactory::FactoryId	CalculateSlotOptionForOutput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)

ITensorHandleFactory::FactoryId	CalculateSlotOption (BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool importEnabled)

EdgeStrategy	CalculateEdgeStrategy (BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)

OptimizationResult	SelectTensorHandleStrategy (Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, Optional< std::vector< std::string > &> errMessages)

std::vector< ConvertBf16ToFp32Layer * >	InsertConvertBf16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)

std::vector< ConvertFp32ToBf16Layer * >	InsertConvertFp32ToBf16LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)

std::vector< ConvertFp16ToFp32Layer * >	InsertConvertFp16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)

std::vector< ConvertFp32ToBf16Layer * >	InsertConvertFp32ToBf16LayersAfter (Graph &graph, Layer &layer)

std::vector< ConvertFp32ToFp16Layer * >	InsertConvertFp32ToFp16LayersAfter (Graph &graph, Layer &layer)

std::vector< DebugLayer * >	InsertDebugLayerAfter (Graph &graph, Layer &layer)

template<typename T >
void	Append (Optimizer::Optimizations &optimizations, T &&optimization)

template<typename Front , typename... Others>
void	Append (Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)

template<typename... Args>
Optimizer::Optimizations	MakeOptimizations (Args &&... args)

Measurement	FindMeasurement (const std::string &name, const Event *event)

std::vector< Measurement >	FindKernelMeasurements (const Event *event)

const Event *	GetEventPtr (const Event *ptr)

const Event *	GetEventPtr (const std::unique_ptr< Event > &ptr)

int	CalcLevel (const Event *eventPtr)

void	ConfigureDetailsObject (JsonChildObject &detailsObject, std::string layerDetailsStr)

void	ExtractJsonObjects (unsigned int inferenceIndex, const Event parentEvent, JsonChildObject &parentObject, std::map< const Event , std::vector< const Event *>> descendantsMap)

template<typename DescriptorType >
void	ProfilingUpdateDescriptions (const std::string &name, const DescriptorType &desc, const WorkloadInfo &infos, const arm::pipe::ProfilingGuid guid)

template<typename Delegate >
void	ForEachLayerInput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)

template<typename Delegate >
void	ForEachLayerOutput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)

void	AssignSplitId (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)

bool	IsReadyForSplitAssignment (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)

	TEST_SUITE ("TestConstTensorLayerVisitor")

size_t	GetProfilerEventSequenceSize (armnn::IProfiler *profiler)

void	RuntimeLoadedNetworksReserve (armnn::RuntimeImpl *runtime)

	TEST_SUITE ("TestInputOutputLayerVisitor")

void	CheckLayerBindingId (LayerBindingId visitorId, LayerBindingId id)

bool	IsLayerSupported (const armnn::Layer *layer)

bool	IsLayerSupported (const armnn::Layer &layer)

bool	IsLayerOptimizable (const armnn::Layer *layer)

bool	IsLayerOptimizable (const armnn::Layer &layer)

constexpr const char *	MockTensorHandleFactoryId ()

Graph &	GetGraphForTesting (IOptimizedNetwork *optNet)

ModelOptions &	GetModelOptionsForTesting (IOptimizedNetwork *optNet)

arm::pipe::IProfilingService &	GetProfilingService (armnn::RuntimeImpl *runtime)

std::ostream &	operator<< (std::ostream &os, const BFloat16 &b)

void	ReportUntouchedLayers (OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)

template<typename LayerType >
LayerType *	FuseLayer (OptimizationViews &optimizationViews, LayerType baseLayer, LayerType replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)

template<typename LayerType >
LayerType *	FuseAdditionLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseSubtractionLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseDivisionLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseMultiplicationLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseBatchNormalizationLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseDepthwiseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
LayerType *	FuseFullyConnectedLayer (OptimizationViews &optimizationViews, LayerType baseLayer, ActivationLayer activationLayer, ActivationDescriptor &activationDesc, std::string name)

template<typename LayerType >
std::vector< IConnectableLayer * >	ChainReduceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)

template<typename LayerType >
void	ReplaceLayers (OptimizationViews &optimizationViews, LayerType baseLayer, std::vector< IConnectableLayer > &layers)

arm_compute::NormalizationLayerInfo	CreateAclNormalizationLayerInfoForL2Normalization (const armnn::TensorInfo &tensorInfo, armnn::DataLayout dataLayout)

arm_compute::ActivationLayerInfo::ActivationFunction	ConvertActivationFunctionToAclActivationFunction (ActivationFunction armnnFunction)

arm_compute::ActivationLayerInfo	ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor &actDesc)

arm_compute::ActivationLayerInfo	ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor *activationDescPtr)

arm_compute::ActivationLayerInfo	ConvertAdditionalInfoToAclActivationLayerInfo (const QueueDescriptor &queueDescriptor)

arm_compute::ActivationLayerInfo	ConvertLstmActivationFuncToAclLayerInfo (uint32_t activationFunction)

arm_compute::ComparisonOperation	ConvertComparisonOperationToAcl (const ComparisonDescriptor &descriptor)

arm_compute::PoolingType	ConvertPoolingAlgorithmToAclPoolingType (PoolingAlgorithm poolingAlgorithm)

arm_compute::DimensionRoundingType	ConvertOutputShapeRoundingToAclDimensionRoundingType (OutputShapeRounding rounding)

arm_compute::NormType	ConvertNormalizationAlgorithmChannelToAclNormType (NormalizationAlgorithmChannel channelType)

arm_compute::FullyConnectedLayerInfo	ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)

arm_compute::FullyConnectedLayerInfo	ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, arm_compute::ActivationLayerInfo activationLayerInfo)

arm_compute::InterpolationPolicy	ConvertResizeMethodToAclInterpolationPolicy (ResizeMethod resizeMethod)

template<typename T >
T	ComputeSoftmaxAclAxis (const SoftmaxDescriptor &softmaxDesc, const armnn::TensorInfo &tensor)

std::set< unsigned int >	ComputeSplitAxis (const armnn::SplitterDescriptor &desc, const TensorShape &input)

int	ComputeAclAxis (const int &armnnAxis, const armnn::TensorInfo &tensor)
	Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank) More...

unsigned int	ComputePositiveAxis (const int &axis, const armnn::TensorInfo &tensor)
	Function to convert axis to its positive equivalent value. More...

arm_compute::Conv3dInfo	ComputeConv3DInfo (const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
	Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor. More...

arm_compute::Conv3dInfo	ComputeConv3DInfo (const armnn::Convolution3dQueueDescriptor queueDescriptor, bool isFastMathEnabled)

arm_compute::PaddingMode	ConvertPaddingModeToAcl (const PaddingMode &paddingMode)

arm_compute::ReductionOperation	ConvertReductionOperationToAcl (const ReduceDescriptor &descriptor)

const TensorInfo	ComputeReductionTensorShape (const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
	Function to compute the output tensor shape based on the axes and if keepDims is set. More...

armnn::Optional< armnn::DataType >	GetBiasTypeFromWeightsType (armnn::Optional< armnn::DataType > weightsType)

template<typename F >
bool	CheckSupportRule (F rule, Optional< std::string &> reasonIfUnsupported, const char *reason)

template<typename T >
bool	AllTypesAreEqualImpl (T)

template<typename T , typename... Rest>
bool	AllTypesAreEqualImpl (T t1, T t2, Rest... rest)

std::unique_ptr< IMemoryOptimizerStrategy >	GetMemoryOptimizerStrategy (const std::string &strategyName)

const std::vector< std::string >	GetMemoryOptimizerStrategyNames ()

	TEST_SUITE ("MemoryManagerTests")

constexpr const char *	MockImportBackendId ()

constexpr const char *	MockBackendId ()

armnn::ConstTensor	PermuteTensor (const ConstTensorHandle tensor, const PermutationVector &permutationVector, void permuteBuffer)

void	ReshapeWeightsForAcl (TensorInfo &weightInfo, DataLayout dataLayout)

template<typename DataType >
ConstTensor	ReorderWeightChannelsForAcl (const ConstTensor &weightHandle, DataLayout dataLayout, void *permuteBuffer)

TensorInfo	ConvertWeightTensorInfoFromArmnnToAcl (const TensorInfo &weightInfo, DataLayout dataLayout)

std::tuple< ConstTensor, unsigned int >	Convert1HWOTensorToAcl (const ConstTensorHandle weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void permuteBuffer)
	Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,IM] This function coverts a ConstCpuTensorHandle from [1,H,W,IM] to [1,IM,H,W] (if NCHW) or keeps it at [1,H,W,IM] (if NHWC) as required by the compute library. More...

std::tuple< TensorInfo, unsigned int >	Convert1HWOTensorInfoToAcl (const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
	Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,IM] This function coverts a TensorInfo from [1,H,W,IM] to [1,IM,H,W] (if NCHW) or keeps it at [1,H,W,IM] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier. More...

std::tuple< ConstTensor, unsigned int >	Convert1HWOtoMIHW (const ConstTensorHandle weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void permuteBuffer)
	Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]. More...

armnn::ConstTensor	ConvertWeightTensorFromArmnnToAcl (const ConstTensorHandle weightTensor, DataLayout dataLayout, void permuteBuffer)

int32_t	ConvertMaskToACLFormat (int32_t mask, int32_t numDim)

std::map< std::string, unsigned int >	CalculateGatherNdKeyIndices (TensorInfo inputInfo0, TensorInfo inputInfo1)
	Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) More...

template<typename CopyFunc >
void	CopyTensorContentsGeneric (const ITensorHandle srcTensor, ITensorHandle dstTensor, CopyFunc copy)

template<typename SrcTensorHandleType , typename DstTensorHandleType , typename DescriptorType >
void	GatherTensorHandlePairs (const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType , DstTensorHandleType >> &tensorHandlePairs)

std::string	LowerString (std::string value)

TuningLevel	ParseTuningLevel (const BackendOptions::Var &value, TuningLevel defaultValue)

bool	ParseBoolean (const BackendOptions::Var &value, bool defaultValue)

std::string	ParseFile (const BackendOptions::Var &value, std::string defaultValue)

void	ConfigureTuner (arm_compute::CLTuner &tuner, TuningLevel level)

constexpr const char *	ClBackendId ()

flatbuffers::Offset< ClContext >	CreateClContext (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)

flatbuffers::Offset< ClContext >	CreateClContextDirect (flatbuffers::FlatBufferBuilder &_fbb, const std::vector< flatbuffers::Offset< armnn::Program >> *programs=nullptr)

flatbuffers::Offset< Program >	CreateProgram (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)

flatbuffers::Offset< Program >	CreateProgramDirect (flatbuffers::FlatBufferBuilder &_fbb, const char name=nullptr, const std::vector< uint8_t > binary=nullptr)

const armnn::ClContext *	GetClContext (const void *buf)

const armnn::ClContext *	GetSizePrefixedClContext (const void *buf)

const char *	ClContextIdentifier ()

bool	ClContextBufferHasIdentifier (const void *buf)

bool	VerifyClContextBuffer (flatbuffers::Verifier &verifier)

bool	VerifySizePrefixedClContextBuffer (flatbuffers::Verifier &verifier)

const char *	ClContextExtension ()

void	FinishClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)

void	FinishSizePrefixedClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)

constexpr const char *	ClImportTensorHandleFactoryId ()

constexpr const char *	ClTensorHandleFactoryId ()

arm_compute::Status	ClAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)

arm_compute::Status	ClAdditionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)

arm_compute::Status	ClBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)

arm_compute::Status	ClCastValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)

arm_compute::Status	ClComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)

arm_compute::Status	ClConcatWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)

arm_compute::Status	ClConstantWorkloadValidate (const TensorInfo &output)

arm_compute::Status	ClConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)

arm_compute::Status	ClDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClFloorWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)

arm_compute::Status	ClGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)

arm_compute::Status	ClInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)

arm_compute::Status	ClL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)

arm_compute::Status	ClLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)

arm_compute::Status	ClLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	ClMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClMeanValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)

arm_compute::Status	ClMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	ClMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)

arm_compute::Status	ClPadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)

arm_compute::Status	ClPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)

arm_compute::Status	ClPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)

arm_compute::Status	ClPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)

arm_compute::Status	ClPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)

arm_compute::Status	ClQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	ClQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo)

arm_compute::Status	ClQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)

arm_compute::Status	ClReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)

arm_compute::Status	ClRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)

arm_compute::Status	ClSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)

arm_compute::Status	ClSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)

arm_compute::Status	ClSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)

arm_compute::Status	ClSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)

arm_compute::Status	ClSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	ClStackWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const StackDescriptor &descriptor)

arm_compute::Status	ClStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)

arm_compute::Status	ClSubtractionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	ClTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

arm_compute::Status	ClTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)

arm_compute::Status	ClUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &output, const Optional< TensorInfo > &hiddenStateOutput, const Optional< TensorInfo > &cellStateOutput, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

std::string	GetConvolutionMethodString (arm_compute::ConvolutionMethod &convolutionMethod)

template<typename T >
void	CopyArmComputeClTensorData (arm_compute::CLTensor &dstTensor, const T *srcData)

auto	SetClStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)

auto	SetClSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)

void	InitializeArmComputeClTensorData (arm_compute::CLTensor &clTensor, const ConstTensorHandle *handle)

RuntimeException	WrapClError (const cl::Error &clError, const CheckLocation &location)

void	RunClFunction (arm_compute::IFunction &function, const CheckLocation &location)

template<typename DataType , typename PayloadType >
DataType *	GetOutputTensorData (unsigned int idx, const PayloadType &data)

constexpr const char *	NeonBackendId ()

constexpr const char *	NeonTensorHandleFactoryId ()

arm_compute::Status	NeonAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)

arm_compute::Status	NeonAdditionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)

arm_compute::Status	NeonBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)

arm_compute::Status	NeonCastValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)

arm_compute::Status	NeonComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)

arm_compute::Status	NeonConcatWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)

arm_compute::Status	NeonConstantWorkloadValidate (const TensorInfo &output)

arm_compute::Status	NeonConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)

arm_compute::Status	NeonDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::DetectionPostProcessLayerInfo	MakeInfo (const DetectionPostProcessDescriptor &descriptor)

arm_compute::Status	NeonDetectionPostProcessValidate (const TensorInfo &boxEncodings, const TensorInfo &scores, const TensorInfo &anchors, const TensorInfo &detectionBoxes, const TensorInfo &detectionClasses, const TensorInfo &detectionScores, const TensorInfo &numDetections, const DetectionPostProcessDescriptor &descriptor)

arm_compute::Status	NeonDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)

arm_compute::Status	NeonGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)

arm_compute::Status	NeonInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)

arm_compute::Status	NeonL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)

arm_compute::Status	NeonLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	NeonLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	NeonLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)

arm_compute::Status	NeonLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)

arm_compute::Status	NeonMeanWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)

arm_compute::Status	NeonMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
	Validate function for validating the inputs and output. More...

arm_compute::Status	NeonMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)

arm_compute::Status	NeonPadWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)

arm_compute::Status	NeonPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)

arm_compute::Status	NeonPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)

arm_compute::Status	NeonPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)

arm_compute::Status	NeonPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)

arm_compute::Status	NeonQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const QuantizedLstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)

arm_compute::Status	NeonReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)

arm_compute::Status	NeonRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)

arm_compute::Status	NeonSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)

arm_compute::Status	NeonSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)

arm_compute::Status	NeonSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)

arm_compute::Status	NeonSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)

arm_compute::Status	NeonSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)

arm_compute::Status	NeonStackWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const StackDescriptor &descriptor)

arm_compute::Status	NeonStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)

arm_compute::Status	NeonSubtractionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)

arm_compute::Status	NeonTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)

arm_compute::Status	NeonTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)

arm_compute::Status	NeonUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

arm_compute::Status	NeonUnidirectionalSequenceLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)

template<typename T >
void	CopyArmComputeTensorData (arm_compute::Tensor &dstTensor, const T *srcData)

void	InitializeArmComputeTensorData (arm_compute::Tensor &tensor, const ConstTensorHandle *handle)

auto	SetNeonStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)

auto	SetNeonSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)

constexpr const char *	RefBackendId ()

constexpr const char *	RefTensorHandleFactoryId ()

template<DataType ArmnnType>
bool	IsDataType (const WorkloadInfo &info)

bool	IsSigned32 (const WorkloadInfo &info)

bool	IsBFloat16 (const WorkloadInfo &info)

bool	IsFloat16 (const WorkloadInfo &info)

bool	IsQSymmS16 (const WorkloadInfo &info)

bool	IsQSymmS8 (const WorkloadInfo &info)

bool	IsQAsymmS8 (const WorkloadInfo &info)

bool	IsQAsymmU8 (const WorkloadInfo &info)

template<typename QueueDescriptorType >
constexpr bool	IsOperationQueueDescriptor (const QueueDescriptorType &)

template<>
constexpr bool	IsOperationQueueDescriptor (const MemCopyQueueDescriptor &)

template<>
constexpr bool	IsOperationQueueDescriptor (const ConstantQueueDescriptor &)

template<>
constexpr bool	IsOperationQueueDescriptor (const PermuteQueueDescriptor &)

float	Activation (float in, ActivationFunction function, float a, float b)

void	Activation (Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)

template<typename OUT >
void	ArgMinMax (Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)

template void	ArgMinMax (Decoder< float > &in, int32_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)

template void	ArgMinMax (Decoder< float > &in, int64_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)

void	BatchNormImpl (const BatchNormalizationQueueDescriptor &data, Decoder< float > &meanDecoder, Decoder< float > &varianceDecoder, Decoder< float > &betaDecoder, Decoder< float > &gammaDecoder, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)

unsigned int	Offset (const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)

void	BatchToSpaceNd (const DataLayoutIndexed &dataLayout, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, const std::vector< unsigned int > &blockShape, const std::vector< std::pair< unsigned int, unsigned int >> &cropsData, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)

void	Concatenate (const ConcatQueueDescriptor &data, std::vector< ITensorHandle > inputs, std::vector< ITensorHandle > outputs)

void	Convolve3d (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int paddingFront, unsigned int xStride, unsigned int yStride, unsigned int zStride, unsigned int xDilation, unsigned int yDilation, unsigned int zDilation)

void	Convolve (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)

template<typename T >
void	Debug (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< BFloat16 > (const TensorInfo &inputInfo, const BFloat16 *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< Half > (const TensorInfo &inputInfo, const Half *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< float > (const TensorInfo &inputInfo, const float *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< uint8_t > (const TensorInfo &inputInfo, const uint8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< int8_t > (const TensorInfo &inputInfo, const int8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< int16_t > (const TensorInfo &inputInfo, const int16_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template void	Debug< int32_t > (const TensorInfo &inputInfo, const int32_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)

template<typename T >
std::unique_ptr< Decoder< T > >	MakeDecoder (const TensorInfo &info, const void *data=nullptr)

template<>
std::unique_ptr< Decoder< float > >	MakeDecoder (const TensorInfo &info, const void *data)

template<>
std::unique_ptr< Decoder< bool > >	MakeDecoder (const TensorInfo &info, const void *data)

template<>
std::unique_ptr< Decoder< int32_t > >	MakeDecoder (const TensorInfo &info, const void *data)

void	DepthToSpace (const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void inputData, void outputData, unsigned int dataTypeSize)

void	Dequantize (Decoder< float > &inputDecoder, Encoder< float > &outputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo)

std::vector< unsigned int >	GenerateRangeK (unsigned int k)

void	TopKSort (unsigned int k, unsigned int indices, const float values, unsigned int numElement)

float	IntersectionOverUnion (const float boxI, const float boxJ)

std::vector< unsigned int >	NonMaxSuppression (unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)

void	AllocateOutputData (unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float detectionBoxes, float detectionScores, float detectionClasses, float numDetections)

void	DetectionPostProcess (const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &anchorsInfo, const TensorInfo &detectionBoxesInfo, const TensorInfo &detectionClassesInfo, const TensorInfo &detectionScoresInfo, const TensorInfo &numDetectionsInfo, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float detectionBoxes, float detectionClasses, float detectionScores, float numDetections)

template<typename T >
std::unique_ptr< Encoder< T > >	MakeEncoder (const TensorInfo &info, void *data=nullptr)

template<>
std::unique_ptr< Encoder< float > >	MakeEncoder (const TensorInfo &info, void *data)

template<>
std::unique_ptr< Encoder< bool > >	MakeEncoder (const TensorInfo &info, void *data)

template<>
std::unique_ptr< Encoder< int32_t > >	MakeEncoder (const TensorInfo &info, void *data)

void	Fill (Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
	Creates a tensor and fills it with a scalar value. More...

void	FullyConnected (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *rBiasDecoder, bool biasEnabled, unsigned int K, bool transposeWeights)
	Performs a matrix multiplication and optionally adds a bias. More...

void	Gather (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis)

void	InstanceNorm (const InstanceNormalizationQueueDescriptor &data, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)

void	LogSoftmax (Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)

void	LstmImpl (const LstmDescriptor &descriptor, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const TensorShape &inputToOutputWeightsShape, const TensorShape &recurrentToOutputWeightsShape, std::unique_ptr< Decoder< float >> &inputData, std::unique_ptr< Decoder< float >> &outputStateIn, std::unique_ptr< Decoder< float >> &cellStateIn, std::unique_ptr< Encoder< float >> &outputStateOut, std::unique_ptr< Encoder< float >> &cellStateOut, std::unique_ptr< Encoder< float >> &output, std::unique_ptr< Decoder< float >> &cellStateOutDecoder, std::unique_ptr< Decoder< float >> &outputDecoder, std::unique_ptr< Decoder< float >> &inputToInputWeightsTensor, std::unique_ptr< Decoder< float >> &inputToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &inputToCellWeightsTensor, std::unique_ptr< Decoder< float >> &inputToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToInputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToCellWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToInputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &cellToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &inputGateBiasTensor, std::unique_ptr< Decoder< float >> &forgetGateBiasTensor, std::unique_ptr< Decoder< float >> &cellBiasTensor, std::unique_ptr< Decoder< float >> &outputGateBiasTensor, std::unique_ptr< Decoder< float >> &projectionWeightsTensor, std::unique_ptr< Decoder< float >> &projectionBiasTensor, std::unique_ptr< Decoder< float >> &inputLayerNormWeights, std::unique_ptr< Decoder< float >> &forgetLayerNormWeights, std::unique_ptr< Decoder< float >> &cellLayerNormWeights, std::unique_ptr< Decoder< float >> &outputLayerNormWeights, std::unique_ptr< Encoder< float >> &inputGateScratch, std::unique_ptr< Encoder< float >> &cellScratch, std::unique_ptr< Encoder< float >> &forgetGateScratch, std::unique_ptr< Encoder< float >> &outputGateScratch, std::unique_ptr< Decoder< float >> &inputGateScratchDecoder, std::unique_ptr< Decoder< float >> &cellScratchDecoder, std::unique_ptr< Decoder< float >> &forgetGateScratchDecoder, std::unique_ptr< Decoder< float >> &outputGateScratchDecoder, float layerNormEpsilon)

void	MirrorPad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle inputHandle, ITensorHandle outputHandle, const PadQueueDescriptor &data)

void	Pad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle inputHandle, ITensorHandle outputHandle, const PadQueueDescriptor &data)

void	Pooling2d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
	Computes the Pooling2d operation. More...

void	Pooling3d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
	Computes the Pooling3d operation. More...

void	PreluImpl (const TensorInfo &inputInfo, const TensorInfo &alphaInfo, const TensorInfo &outputInfo, Decoder< float > &inputData, Decoder< float > &alphaData, Encoder< float > &outputData)

bool	NextIndex (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)

unsigned int	ReducedOutputOffset (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)

void	Reduce (const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)

void	FakeQuantization (const float inputData, float outputData, uint32_t numElements, float min, float max)

unsigned int	GetNumActivations (const TensorInfo &inputInfo)

const TensorInfo &	GetTensorInfo (const ITensorHandle *tensorHandle)
	float32 helpers More...

template<typename DataType , typename PayloadType >
const DataType *	GetInputTensorData (unsigned int idx, const PayloadType &data)

template<typename DataType >
DataType *	GetOutputTensorData (ITensorHandle *tensorHandle)

template<typename PayloadType >
const float *	GetInputTensorDataFloat (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
float *	GetOutputTensorDataFloat (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
const Half *	GetInputTensorDataHalf (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
Half *	GetOutputTensorDataHalf (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
const BFloat16 *	GetInputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)

template<typename PayloadType >
BFloat16 *	GetOutputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)

template<typename T >
std::vector< float >	Dequantize (const T *quant, const TensorInfo &info)
	u8 helpers More...

template<typename T >
void	Dequantize (const T inputData, float outputData, const TensorInfo &info)

void	Quantize (uint8_t quant, const float dequant, const TensorInfo &info)

void	Resize (Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, armnn::ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)

void	Slice (const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void inputData, void outputData, unsigned int dataTypeSize)

void	Softmax (Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
	Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. More...

unsigned int	GetOffset (const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)

void	SpaceToBatchNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)

void	SpaceToDepth (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)

void	Split (const SplitterQueueDescriptor &data, std::vector< ITensorHandle > inputs, std::vector< ITensorHandle > outputs)

template<typename DataType >
void	Splitter (const SplitterQueueDescriptor &data, std::vector< ITensorHandle > inputs, std::vector< ITensorHandle > outputs)

void	Stack (const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)

void	StridedSlice (const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void inputData, void outputData, unsigned int dataTypeSize)

void	TransposeConvolution2dImpl (const TransposeConvolution2dDescriptor &descriptor, const TensorShape &inputShape, Decoder< float > &inputDecoder, const TensorShape &outputShape, Encoder< float > &outputEncoder, const TensorShape &weightsShape, Decoder< float > &weightsDecoder, Decoder< float > *biasesDecoder)

std::istream &	operator>> (std::istream &in, armnn::Compute &compute)

std::istream &	operator>> (std::istream &in, armnn::BackendId &backend)

Variables
constexpr unsigned int	MaxNumOfTensorDimensions = 5U

constexpr unsigned int	LOWEST_CAPTURE_PERIOD = 10000u
	The lowest performance data capture interval we support is 10 miliseconds. More...

constexpr unsigned int	EXPIRE_RATE = 3U
	Variable to control expire rate of priority queue. More...

constexpr std::size_t	g_ProfilingEventCountHint = 1024

constexpr bool	g_WriteProfilingEventSequence = true

constexpr bool	g_AggregateProfilingEventsByInference = true

constexpr bool	g_WriteReportToStdOutOnProfilerDestruction = false

thread_local IProfiler *	tl_Profiler = nullptr

constexpr size_t	wordSize = sizeof(size_t) * 8

const BackendCapabilities	gpuAccCapabilities ("GpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", true}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

const BackendCapabilities	cpuAccCapabilities ("CpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

const std::set< armnn::LayerType >	paddingRequiredLayers

const BackendCapabilities	cpuRefCapabilities ("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

const std::set< armnn::BackendCapability >	oldCpuRefCapabilities

Detailed Description

Optional is a drop in replacement for std::optional until we migrate to c++-17.

Create pages for each tool so they appear nicely in the doxygen tree-view.

SPDX-License-Identifier: MIT

Subpages are not listed there. Also we can overwrite the page name this way.

Subpages are not listed there.

Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox

Only a subset of the optional features are implemented that we intend to use in ArmNN. There are two distinct implementations here:

1, for normal constructable/destructable types and reference types 2, for reference types The std::optional features we support are:

has_value() and operator bool() to tell if the optional has a value
value() returns a reference to the held object

Typedef Documentation

◆ ACLMemManagerOnDemand

using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>

Definition at line 22 of file NeonFullyConnectedWorkload.cpp.

◆ AdditionalInfoObjectPtr

using AdditionalInfoObjectPtr = std::shared_ptr<void>

Definition at line 213 of file Layer.hpp.

◆ BackendCapabilities

using BackendCapabilities = BackendOptions

Definition at line 19 of file BackendOptions.hpp.

◆ BackendIdSet

using BackendIdSet = std::unordered_set<BackendId>

Definition at line 193 of file BackendId.hpp.

◆ BackendIdVector

using BackendIdVector = std::vector<BackendId>

Definition at line 192 of file BackendId.hpp.

◆ BackendsMap

using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal> >

Definition at line 294 of file Network.hpp.

◆ BaseFloat32ComparisonWorkload

using BaseFloat32ComparisonWorkload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>

Definition at line 212 of file Workload.hpp.

◆ BaseUint8ComparisonWorkload

using BaseUint8ComparisonWorkload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean>

Definition at line 217 of file Workload.hpp.

◆ BFloat16ToFloat32Workload

using BFloat16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32>

Definition at line 222 of file Workload.hpp.

◆ BindingPointInfo

using BindingPointInfo = std::pair<armnn::LayerBindingId, armnn::TensorInfo>

Definition at line 274 of file Tensor.hpp.

◆ BooleanWorkload

using BooleanWorkload = TypedWorkload<QueueDescriptor, armnn::DataType::Boolean>

Definition at line 207 of file Workload.hpp.

◆ CompiledBlobDeleter

typedef std::function< void(const void *)> CompiledBlobDeleter

Definition at line 244 of file INetwork.hpp.

◆ CompiledBlobPtr

typedef std::unique_ptr< void, CompiledBlobDeleter > CompiledBlobPtr

Definition at line 245 of file INetwork.hpp.

◆ ConcatDescriptor

using ConcatDescriptor = OriginsDescriptor

Definition at line 55 of file DescriptorsFwd.hpp.

◆ Coordinates

using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 15 of file InternalTypes.hpp.

◆ CopyAndImportFactoryPairs

using CopyAndImportFactoryPairs = std::map<ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId>

Definition at line 19 of file TensorHandleFactoryRegistry.hpp.

◆ DebugCallbackFunction

using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>

Define the type of callback for the Debug layer to call.

Parameters

guid	- guid of layer connected to the input of the Debug layer
slotIndex	- index of the output slot connected to the input of the Debug layer
tensorHandle	- TensorHandle for the input tensor to the Debug layer

Definition at line 379 of file Types.hpp.

◆ DepthToSpaceDescriptor

typedef SpaceToDepthDescriptor DepthToSpaceDescriptor

A DepthToSpaceDescriptor for the DepthToSpaceLayer.

Definition at line 1080 of file Descriptors.hpp.

◆ Dimensions

using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 16 of file InternalTypes.hpp.

◆ DynamicBackendPtr

using DynamicBackendPtr = std::unique_ptr<DynamicBackend>

Definition at line 52 of file DynamicBackend.hpp.

◆ FactoryId

typedef ITensorHandleFactory::FactoryId FactoryId

Definition at line 12 of file MockTensorHandleFactory.cpp.

◆ Float16ToFloat32Workload

using Float16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32>

Definition at line 232 of file Workload.hpp.

◆ Float32ToBFloat16Workload

using Float32ToBFloat16Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16>

Definition at line 227 of file Workload.hpp.

◆ Float32ToFloat16Workload

using Float32ToFloat16Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16>

Definition at line 237 of file Workload.hpp.

◆ Float32Workload

using Float32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Float32>

Definition at line 198 of file Workload.hpp.

◆ FloatWorkload

using FloatWorkload = TypedWorkload<QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32>

Definition at line 195 of file Workload.hpp.

◆ Half

using Half = half_float::half

Definition at line 18 of file Half.hpp.

◆ HighResolutionClock

using HighResolutionClock = std::chrono::high_resolution_clock::time_point

Define a timer and associated inference ID for recording execution times.

Definition at line 382 of file Types.hpp.

◆ IBackendContextUniquePtr

using IBackendContextUniquePtr = std::unique_ptr<IBackendContext>

Definition at line 34 of file IBackendContext.hpp.

◆ IBackendInternalUniquePtr

typedef std::unique_ptr< IBackendInternal > IBackendInternalUniquePtr

Definition at line 32 of file BackendRegistry.hpp.

◆ IBackendSharedPtr

using IBackendSharedPtr = std::shared_ptr<IBackend>

Definition at line 263 of file Types.hpp.

◆ IBackendUniquePtr

using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>

Definition at line 264 of file Types.hpp.

◆ IGpuAccTunedParametersPtr

using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>

The following API is replaced by the backend options API.

Definition at line 295 of file IRuntime.hpp.

◆ IInitialiseProfilingService

using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

Definition at line 28 of file Runtime.hpp.

◆ ILayerSupportSharedPtr

using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>

Definition at line 572 of file ILayerSupport.hpp.

◆ IMemoryManagerUniquePtr

using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>

Definition at line 24 of file IMemoryManager.hpp.

◆ ImportedInputId

using ImportedInputId = unsigned int

Definition at line 291 of file Types.hpp.

◆ ImportedOutputId

using ImportedOutputId = unsigned int

Definition at line 292 of file Types.hpp.

◆ INetworkPtr

using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>

Definition at line 241 of file INetwork.hpp.

◆ InferenceTimingPair

using InferenceTimingPair = std::pair<HighResolutionClock, HighResolutionClock>

Definition at line 383 of file Types.hpp.

◆ InputQueueDescriptor

using InputQueueDescriptor = MemCopyQueueDescriptor

Definition at line 91 of file WorkloadData.hpp.

◆ InputTensors

using InputTensors = std::vector<std::pair<LayerBindingId, class ConstTensor> >

Definition at line 392 of file Tensor.hpp.

◆ instead

typedef ConstPassthroughTensorHandle instead

Definition at line 255 of file TensorHandle.hpp.

◆ Int32Workload

using Int32Workload = TypedWorkload<QueueDescriptor, armnn::DataType::Signed32>

Definition at line 204 of file Workload.hpp.

◆ IOptimizedNetworkPtr

using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>

Definition at line 242 of file INetwork.hpp.

◆ IReportStructure

using IReportStructure = arm::pipe::IReportStructure

Definition at line 27 of file Runtime.hpp.

◆ IRuntimePtr

using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>

Definition at line 33 of file IRuntime.hpp.

◆ LayerBindingId

using LayerBindingId = int

Type of identifiers for bindable layers (inputs, outputs).

Definition at line 290 of file Types.hpp.

◆ LayerPriority

using LayerPriority = unsigned int

Definition at line 212 of file Layer.hpp.

◆ LayerTypeOf

using LayerTypeOf = typename LayerTypeOfImpl<Type>::Type

Definition at line 90 of file LayersFwd.hpp.

◆ LoadedNetworks

using LoadedNetworks = std::unordered_map<NetworkId, std::unique_ptr<LoadedNetwork> >

Definition at line 26 of file Runtime.hpp.

◆ LogSoftmaxDescriptor

typedef SoftmaxDescriptor LogSoftmaxDescriptor

A LogSoftmaxDescriptor for the LogSoftmaxLayer.

Definition at line 169 of file Descriptors.hpp.

◆ MemoryOptimizerStrategiesMapRef

using MemoryOptimizerStrategiesMapRef = std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy> >

Definition at line 33 of file BackendRegistry.hpp.

◆ MemorySourceFlags

using MemorySourceFlags = unsigned int

Definition at line 15 of file MemorySources.hpp.

◆ MergerDescriptor

using MergerDescriptor = OriginsDescriptor

MergerDescriptor is deprecated, use ConcatDescriptor instead.

Definition at line 59 of file DescriptorsFwd.hpp.

◆ MergerQueueDescriptor

using MergerQueueDescriptor = ConcatQueueDescriptor

Definition at line 149 of file WorkloadData.hpp.

◆ ModelOptions

using ModelOptions = std::vector<BackendOptions>

Definition at line 18 of file BackendOptions.hpp.

◆ NetworkId

typedef int NetworkId

Definition at line 27 of file IRuntime.hpp.

◆ NetworkImplPtr

using NetworkImplPtr = std::unique_ptr<NetworkImpl, void (*)(NetworkImpl* network)>

Definition at line 28 of file Network.hpp.

◆ NetworkOptions

using NetworkOptions = std::vector<BackendOptions>

Definition at line 16 of file BackendOptions.hpp.

◆ OutputQueueDescriptor

using OutputQueueDescriptor = MemCopyQueueDescriptor

Definition at line 92 of file WorkloadData.hpp.

◆ OutputTensors

using OutputTensors = std::vector<std::pair<LayerBindingId, class Tensor> >

Definition at line 393 of file Tensor.hpp.

◆ ParameterStringifyFunction

using ParameterStringifyFunction = std::function<void(const std::string& name, const std::string& value)>

Definition at line 14 of file SerializeLayerParameters.hpp.

◆ PreCompiledObjectDeleter

using PreCompiledObjectDeleter = std::function<void(const void*)>

Definition at line 19 of file PreCompiledLayer.hpp.

◆ PreCompiledObjectPtr

using PreCompiledObjectPtr = std::unique_ptr<void, PreCompiledObjectDeleter>

Definition at line 20 of file PreCompiledLayer.hpp.

◆ RefAdditionWorkload

using RefAdditionWorkload = RefElementwiseWorkload<std::plus<DataType>, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute>

Definition at line 40 of file RefElementwiseWorkload.hpp.

◆ RefDebugBFloat16Workload

using RefDebugBFloat16Workload = RefDebugWorkload<DataType::BFloat16>

Definition at line 42 of file RefDebugWorkload.hpp.

◆ RefDebugFloat16Workload

using RefDebugFloat16Workload = RefDebugWorkload<DataType::Float16>

Definition at line 43 of file RefDebugWorkload.hpp.

◆ RefDebugFloat32Workload

using RefDebugFloat32Workload = RefDebugWorkload<DataType::Float32>

Definition at line 44 of file RefDebugWorkload.hpp.

◆ RefDebugQAsymmS8Workload

using RefDebugQAsymmS8Workload = RefDebugWorkload<DataType::QAsymmS8>

Definition at line 46 of file RefDebugWorkload.hpp.

◆ RefDebugQAsymmU8Workload

using RefDebugQAsymmU8Workload = RefDebugWorkload<DataType::QAsymmU8>

Definition at line 45 of file RefDebugWorkload.hpp.

◆ RefDebugQSymmS16Workload

using RefDebugQSymmS16Workload = RefDebugWorkload<DataType::QSymmS16>

Definition at line 47 of file RefDebugWorkload.hpp.

◆ RefDebugQSymmS8Workload

using RefDebugQSymmS8Workload = RefDebugWorkload<DataType::QSymmS8>

Definition at line 48 of file RefDebugWorkload.hpp.

◆ RefDebugSigned32Workload

using RefDebugSigned32Workload = RefDebugWorkload<DataType::Signed32>

Definition at line 49 of file RefDebugWorkload.hpp.

◆ RefDivisionWorkload

using RefDivisionWorkload = RefElementwiseWorkload<std::divides<DataType>, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute>

Definition at line 58 of file RefElementwiseWorkload.hpp.

◆ RefMaximumWorkload

using RefMaximumWorkload = RefElementwiseWorkload<armnn::maximum<DataType>, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute>

Definition at line 64 of file RefElementwiseWorkload.hpp.

◆ RefMinimumWorkload

using RefMinimumWorkload = RefElementwiseWorkload<armnn::minimum<DataType>, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute>

Definition at line 70 of file RefElementwiseWorkload.hpp.

◆ RefMultiplicationWorkload

using RefMultiplicationWorkload = RefElementwiseWorkload<std::multiplies<DataType>, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute>

Definition at line 52 of file RefElementwiseWorkload.hpp.

◆ RefPermuteBFloat16Workload

using RefPermuteBFloat16Workload = RefPermuteWorkload<DataType::BFloat16>

Definition at line 33 of file RefPermuteWorkload.hpp.

◆ RefPermuteFloat16Workload

using RefPermuteFloat16Workload = RefPermuteWorkload<DataType::Float16>

Definition at line 34 of file RefPermuteWorkload.hpp.

◆ RefPermuteFloat32Workload

using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>

Definition at line 35 of file RefPermuteWorkload.hpp.

◆ RefPermuteQAsymm8Workload

using RefPermuteQAsymm8Workload = RefPermuteWorkload<DataType::QAsymmU8>

Definition at line 37 of file RefPermuteWorkload.hpp.

◆ RefPermuteQAsymmS8Workload

using RefPermuteQAsymmS8Workload = RefPermuteWorkload<DataType::QAsymmS8>

Definition at line 36 of file RefPermuteWorkload.hpp.

◆ RefPermuteQSymm16Workload

using RefPermuteQSymm16Workload = RefPermuteWorkload<DataType::QSymmS16>

Definition at line 38 of file RefPermuteWorkload.hpp.

◆ RefSubtractionWorkload

using RefSubtractionWorkload = RefElementwiseWorkload<std::minus<DataType>, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute>

Definition at line 46 of file RefElementwiseWorkload.hpp.

◆ RefTransposeBFloat16Workload

using RefTransposeBFloat16Workload = RefTransposeWorkload<DataType::BFloat16>

Definition at line 33 of file RefTransposeWorkload.hpp.

◆ RefTransposeFloat16Workload

using RefTransposeFloat16Workload = RefTransposeWorkload<DataType::Float16>

Definition at line 34 of file RefTransposeWorkload.hpp.

◆ RefTransposeFloat32Workload

using RefTransposeFloat32Workload = RefTransposeWorkload<DataType::Float32>

Definition at line 35 of file RefTransposeWorkload.hpp.

◆ RefTransposeQAsymm8Workload

using RefTransposeQAsymm8Workload = RefTransposeWorkload<DataType::QAsymmU8>

Definition at line 37 of file RefTransposeWorkload.hpp.

◆ RefTransposeQAsymmS8Workload

using RefTransposeQAsymmS8Workload = RefTransposeWorkload<DataType::QAsymmS8>

Definition at line 36 of file RefTransposeWorkload.hpp.

◆ RefTransposeQSymm16Workload

using RefTransposeQSymm16Workload = RefTransposeWorkload<DataType::QSymmS16>

Definition at line 38 of file RefTransposeWorkload.hpp.

◆ ResolveType

using ResolveType = typename ResolveTypeImpl<DT>::Type

Definition at line 79 of file ResolveType.hpp.

◆ SplitterDescriptor

using SplitterDescriptor = ViewsDescriptor

Definition at line 60 of file DescriptorsFwd.hpp.

◆ TensorInfos

using TensorInfos = std::vector<TensorInfo>

Definition at line 151 of file BackendHelper.cpp.

◆ Uint8ToFloat32Workload

using Uint8ToFloat32Workload = MultiTypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32>

Definition at line 242 of file Workload.hpp.

◆ Uint8Workload

using Uint8Workload = TypedWorkload<QueueDescriptor, armnn::DataType::QAsymmU8>

Definition at line 201 of file Workload.hpp.

◆ UnidirectionalSequenceLstmDescriptor

typedef LstmDescriptor UnidirectionalSequenceLstmDescriptor

Definition at line 1150 of file Descriptors.hpp.

◆ WorkloadQueue

using WorkloadQueue = std::vector< std::unique_ptr<IWorkload> >

Definition at line 13 of file ExecutionFrame.hpp.

Enumeration Type Documentation

◆ ActivationFunction

enum ActivationFunction

strong

Enumerator
Sigmoid
TanH
Linear
ReLu
BoundedReLu	min(a, max(b, input)) ReLu1 & ReLu6.
SoftReLu
LeakyReLu
Abs
Sqrt
Square
Elu
HardSwish

Definition at line 86 of file Types.hpp.

 {
     Sigmoid     = 0,
     TanH        = 1,
     Linear      = 2,
     ReLu        = 3,
     BoundedReLu = 4, ///< min(a, max(b, input)) ReLu1 & ReLu6.
     SoftReLu    = 5,
     LeakyReLu   = 6,
     Abs         = 7,
     Sqrt        = 8,
     Square      = 9,
     Elu         = 10,
     HardSwish   = 11
 };

◆ ArgMinMaxFunction

enum ArgMinMaxFunction

strong

Enumerator
Min
Max

Definition at line 102 of file Types.hpp.

 {
     Min = 0,
     Max = 1
 };

◆ BackendCapability

enum BackendCapability : uint32_t

strong

BackendCapability class.

Enumerator
NonConstWeights	Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be accessed through inputs.
AsyncExecution	Asynchronous Execution.

Definition at line 267 of file Types.hpp.

                              : uint32_t
 {
     /// Constant weights can be accessed through the descriptors,
     /// On the other hand, non-const weights can be accessed through inputs.
     NonConstWeights,
 
     /// Asynchronous Execution.
     AsyncExecution,
 
     // add new enum values here
 };

◆ BoostLogSeverityMapping

enum BoostLogSeverityMapping

strong

Enumerator
trace
debug
info
warning
error
fatal

Definition at line 189 of file Logging.hpp.

◆ CapabilityClass

enum CapabilityClass

strong

Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate.

Enumerator
PaddingRequired
FallbackImportDisabled
CapabilityClassMax

Definition at line 20 of file ITensorHandleFactory.hpp.

 {
     PaddingRequired = 1,
     FallbackImportDisabled = 2,
 
     // add new enum values here
 
     CapabilityClassMax = 254
 };

◆ ComparisonOperation

enum ComparisonOperation

strong

Enumerator
Equal
Greater
GreaterOrEqual
Less
LessOrEqual
NotEqual

Definition at line 108 of file Types.hpp.

 {
     Equal          = 0,
     Greater        = 1,
     GreaterOrEqual = 2,
     Less           = 3,
     LessOrEqual    = 4,
     NotEqual       = 5
 };

◆ Compute

enum Compute

strong

The Compute enum is now deprecated and it is now being replaced by BackendId.

Enumerator
Undefined
CpuRef	CPU Execution: Reference C++ kernels.
CpuAcc	CPU Execution: NEON: ArmCompute.
GpuAcc	GPU Execution: OpenCL: ArmCompute.

Definition at line 21 of file BackendId.hpp.

 {
     Undefined = 0,
     /// CPU Execution: Reference C++ kernels
     CpuRef    = 1,
     /// CPU Execution: NEON: ArmCompute
     CpuAcc    = 2,
     /// GPU Execution: OpenCL: ArmCompute
     GpuAcc    = 3
 };

◆ DataLayout

enum DataLayout

strong

Enumerator
NCHW
NHWC
NDHWC
NCDHW

Definition at line 62 of file Types.hpp.

 {
     NCHW = 1,
     NHWC = 2,
     NDHWC = 3,
     NCDHW = 4
 };

◆ DataType

enum DataType

strong

Enumerator
Float16
Float32
QAsymmU8
Signed32
Boolean
QSymmS16
QSymmS8
QAsymmS8
BFloat16
Signed64

Definition at line 48 of file Types.hpp.

 {
     Float16  = 0,
     Float32  = 1,
     QAsymmU8 = 2,
     Signed32 = 3,
     Boolean  = 4,
     QSymmS16 = 5,
     QSymmS8  = 6,
     QAsymmS8 = 7,
     BFloat16 = 8,
     Signed64 = 9,
 };

◆ Dimensionality

enum Dimensionality

strong

Enumerator
NotSpecified
Specified
Scalar

Definition at line 158 of file Types.hpp.

 {
     NotSpecified = 0,
     Specified    = 1,
     Scalar       = 2
 };

◆ EdgeStrategy

enum EdgeStrategy

strong

Enumerator
Undefined
DirectCompatibility	No strategy has been defined. Used internally to verify integrity of optimizations.
ExportToTarget	Destination backend can work directly with tensors on source backend.
CopyToTarget	Source backends tensor data can be exported to destination backend tensor without copy. Copy contents from source backend tensor to destination backend tensor.

Definition at line 100 of file ITensorHandleFactory.hpp.

 {
     Undefined,              /// No strategy has been defined. Used internally to verify integrity of optimizations.
     DirectCompatibility,    /// Destination backend can work directly with tensors on source backend.
     ExportToTarget,         /// Source backends tensor data can be exported to destination backend tensor without copy.
     CopyToTarget            /// Copy contents from source backend tensor to destination backend tensor.
 };

◆ GraphEvent

enum GraphEvent

strong

Enumerator
LayerAdded
LayerErased

Definition at line 12 of file IGraphObservable.hpp.

 {
     LayerAdded,
     LayerErased
 };

◆ JsonObjectType

enum JsonObjectType

strong

Enumerator
Measurement
Event
ExecObjectDesc

Definition at line 20 of file JsonPrinter.hpp.

 {
     Measurement,
     Event,
     ExecObjectDesc
 };

◆ LayerType

enum LayerType

strong

When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.

Enumerator
X
Activation
Addition
ArgMinMax
BatchNormalization
BatchToSpaceNd
Comparison
Concat
Constant
ConvertBf16ToFp32
ConvertFp16ToFp32
ConvertFp32ToBf16
ConvertFp32ToFp16
Convolution2d
Debug
DepthToSpace
DepthwiseConvolution2d
Dequantize
DetectionPostProcess
Division
ElementwiseUnary
FakeQuantization
Fill
Floor
FullyConnected
Gather
Input
InstanceNormalization
L2Normalization
LogicalBinary
LogSoftmax
Lstm
QLstm
Map
Maximum
Mean
MemCopy
MemImport
Merge
Minimum
Multiplication
Normalization
Output
Pad
Permute
Pooling2d
PreCompiled
Prelu
Quantize
QuantizedLstm
Reshape
Rank
Resize
Reduce
Slice
Softmax
SpaceToBatchNd
SpaceToDepth
Splitter
Stack
StandIn
StridedSlice
Subtraction
Switch
Transpose
TransposeConvolution2d
Unmap
Cast
Shape
UnidirectionalSequenceLstm
ChannelShuffle
Convolution3d
Pooling3d
GatherNd
FirstLayer
LastLayer

Definition at line 467 of file Types.hpp.

 {
 #define X(name) name,
     LIST_OF_LAYER_TYPE
 #undef X
     FirstLayer = Activation,
     LastLayer = UnidirectionalSequenceLstm
 };

◆ LogicalBinaryOperation

enum LogicalBinaryOperation

strong

Enumerator
LogicalAnd
LogicalOr

Definition at line 118 of file Types.hpp.

 {
     LogicalAnd = 0,
     LogicalOr  = 1
 };

◆ LogSeverity

enum LogSeverity

strong

Enumerator
Trace
Debug
Info
Warning
Error
Fatal

Definition at line 14 of file Utils.hpp.

 {
     Trace,
     Debug,
     Info,
     Warning,
     Error,
     Fatal
 };

◆ MemBlockStrategyType

enum MemBlockStrategyType

strong

Enumerator
SingleAxisPacking
MultiAxisPacking

Definition at line 239 of file Types.hpp.

 {
     // MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
     // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
     // equivalent to blob or pooling memory management.
     SingleAxisPacking  = 0,
 
     // MemBlocks can be packed on either Y or X axis but cannot overlap on both.
     // In other words MemBlocks with overlapping lifetimes can use the same MemBin,
     // equivalent to offset or slab memory management.
     MultiAxisPacking  = 1
 };

◆ MemorySource

enum MemorySource : uint32_t

strong

Define the Memory Source to reduce copies.

Enumerator
Undefined
Malloc
DmaBuf
DmaBufProtected
Gralloc

Definition at line 230 of file Types.hpp.

                         : uint32_t
 {
     Undefined = 0,
     Malloc = 1,
     DmaBuf = 2,
     DmaBufProtected = 4,
     Gralloc = 5
 };

◆ NormalizationAlgorithmChannel

enum NormalizationAlgorithmChannel

strong

Enumerator
Across
Within

Definition at line 193 of file Types.hpp.

 {
     Across = 0,
     Within = 1
 };

◆ NormalizationAlgorithmMethod

enum NormalizationAlgorithmMethod

strong

Enumerator
LocalBrightness	Krichevsky 2012: Local Brightness Normalization.
LocalContrast	Jarret 2009: Local Contrast Normalization.

Definition at line 199 of file Types.hpp.

 {
     /// Krichevsky 2012: Local Brightness Normalization
     LocalBrightness = 0,
     /// Jarret 2009: Local Contrast Normalization
     LocalContrast = 1
 };

◆ OutputShapeRounding

enum OutputShapeRounding

strong

Enumerator
Floor
Ceiling

Definition at line 207 of file Types.hpp.

 {
     Floor       = 0,
     Ceiling     = 1
 };

◆ PaddingMethod

enum PaddingMethod

strong

The padding method modifies the output of pooling layers.

In both supported methods, the values are ignored (they are not even zeroes, which would make a difference for max pooling a tensor with negative values). The difference between IgnoreValue and Exclude is that the former counts the padding fields in the divisor of Average and L2 pooling, while Exclude does not.

Enumerator
IgnoreValue	The padding fields count, but are ignored.
Exclude	The padding fields don't count and are ignored.

Definition at line 174 of file Types.hpp.

 {
     /// The padding fields count, but are ignored
     IgnoreValue = 0,
     /// The padding fields don't count and are ignored
     Exclude     = 1
 };

◆ PaddingMode

enum PaddingMode

strong

The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect).

Enumerator
Constant
Reflect
Symmetric

Definition at line 186 of file Types.hpp.

 {
     Constant  = 0,
     Reflect   = 1,
     Symmetric = 2
 };

◆ PoolingAlgorithm

enum PoolingAlgorithm

strong

Enumerator
Max
Average
L2

Definition at line 136 of file Types.hpp.

 {
     Max     = 0,
     Average = 1,
     L2      = 2
 };

◆ ProfilingDetailsMethod

enum ProfilingDetailsMethod

strong

Define the behaviour of the internal profiler when outputting network details.

Enumerator
Undefined
DetailsWithEvents
DetailsOnly

Definition at line 71 of file Types.hpp.

 {
     Undefined = 0,
     DetailsWithEvents = 1,
     DetailsOnly = 2
 };

◆ QosExecPriority

enum QosExecPriority

strong

Enumerator
Low
Medium
High

Definition at line 79 of file Types.hpp.

 {
     Low    = 0,
     Medium = 1,
     High   = 2
 };

◆ ReduceOperation

enum ReduceOperation

strong

Enumerator
Sum
Max
Mean
Min
Prod

Definition at line 143 of file Types.hpp.

 {
     Sum  = 0,
     Max  = 1,
     Mean = 2,
     Min  = 3,
     Prod = 4
 };

◆ ResizeMethod

enum ResizeMethod

strong

Enumerator
Bilinear
NearestNeighbor

Definition at line 152 of file Types.hpp.

 {
     Bilinear        = 0,
     NearestNeighbor = 1
 };

◆ ShapeInferenceMethod

enum ShapeInferenceMethod

strong

The ShapeInferenceMethod modify how the output shapes are treated.

When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer and any mismatch is reported. When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not specified explicitly, however this information can be calculated from the inputs.

Enumerator
ValidateOnly	Validate all output shapes.
InferAndValidate	Infer missing output shapes and validate all output shapes.

Definition at line 221 of file Types.hpp.

 {
     /// Validate all output shapes
     ValidateOnly     = 0,
     /// Infer missing output shapes and validate all output shapes
     InferAndValidate = 1
 };

◆ Status

enum Status

strong

enumeration

Enumerator
Success
Failure

Definition at line 42 of file Types.hpp.

 {
     Success = 0,
     Failure = 1
 };

◆ TuningLevel

enum TuningLevel

strong

Enumerator
None
Rapid
Normal
Exhaustive

Definition at line 70 of file ClBackendContext.cpp.

 {
     None,
     Rapid,
     Normal,
     Exhaustive
 };

◆ UnaryOperation

enum UnaryOperation

strong

Enumerator
Abs
Exp
Sqrt
Rsqrt
Neg
LogicalNot
Log
Sin

Definition at line 124 of file Types.hpp.

 {
     Abs        = 0,
     Exp        = 1,
     Sqrt       = 2,
     Rsqrt      = 3,
     Neg        = 4,
     LogicalNot = 5,
     Log        = 6,
     Sin        = 7
 };

Function Documentation

◆ Activation() [1/2]

float Activation	(	float	in,
		ActivationFunction	function,
		float	a,
		float	b
	)

Definition at line 13 of file Activation.cpp.

References Abs, BoundedReLu, Elu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by Activation(), LstmImpl(), and TEST_SUITE().

 {
     float output;
 
     // Compute the result of the activation function.
     switch (function)
     {
         case ActivationFunction::Linear:
         {
             output = a * in + b;
             break;
         }
         case ActivationFunction::Sigmoid:
         {
             output = 1.f / (1.f + expf(-in));
             break;
         }
         case ActivationFunction::ReLu:
         {
             output = std::max(0.f, in);
             break;
         }
         case ActivationFunction::BoundedReLu:
         {
             output = std::min(a, std::max(b, in));
             break;
         }
         case ActivationFunction::SoftReLu:
         {
             output = logf(1.0f + expf(in));
             break;
         }
         case ActivationFunction::LeakyReLu:
         {
             output = in > 0.0f ? in : (in * a);
             break;
         }
         case ActivationFunction::Abs:
         {
             output = in < 0 ? -in : in;
             break;
         }
         case ActivationFunction::Sqrt:
         {
             output = sqrtf(in);
             break;
         }
         case ActivationFunction::Square:
         {
             output = in * in;
             break;
         }
         case ActivationFunction::TanH:
         {
             output = a * tanhf(b * in);
             break;
         }
         case ActivationFunction::Elu:
         {
             output = (in >= 0) ? in : a * (expf(in) - 1);
             break;
         }
         case ActivationFunction::HardSwish:
         {
             // hard_swish(x) = x * relu6(x+3) / 6
             // relu6(x) = min(max(x,0),6)
             output = in * (std::min(std::max((in + 3),0.0f),6.0f)) / 6;
             break;
         }
         default:
         {
             throw InvalidArgumentException("Unsupported activation function");
         }
     }
 
     return output;
 }

◆ Activation() [2/2]

void Activation	(	Decoder< float > &	in,
		Encoder< float > &	out,
		const TensorInfo &	tensorInfo,
		ActivationFunction	function,
		float	a,
		float	b
	)

Definition at line 95 of file Activation.cpp.

References Activation(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), and Encoder< IType >::Set().

 {
     unsigned int numElements = tensorInfo.GetNumElements();
 
     for (unsigned int i = 0; i < numElements; i++)
     {
         out.Set(Activation(in.Get(), function, a, b));
         ++in;
         ++out;
     }
     in -= numElements;
     out -= numElements;
 }

◆ AllocateOutputData()

void armnn::AllocateOutputData	(	unsigned int	numOutput,
		unsigned int	numSelected,
		const std::vector< float > &	boxCorners,
		const std::vector< unsigned int > &	outputIndices,
		const std::vector< unsigned int > &	selectedBoxes,
		const std::vector< unsigned int > &	selectedClasses,
		const std::vector< float > &	selectedScores,
		float *	detectionBoxes,
		float *	detectionScores,
		float *	detectionClasses,
		float *	numDetections
	)

Definition at line 102 of file DetectionPostProcess.cpp.

References numeric_cast().

Referenced by DetectionPostProcess().

 {
     for (unsigned int i = 0; i < numOutput; ++i)
         {
             unsigned int boxIndex = i * 4;
             if (i < numSelected)
             {
                 unsigned int boxCornorIndex = selectedBoxes[outputIndices[i]] * 4;
                 detectionScores[i] = selectedScores[outputIndices[i]];
                 detectionClasses[i] = armnn::numeric_cast<float>(selectedClasses[outputIndices[i]]);
                 detectionBoxes[boxIndex] = boxCorners[boxCornorIndex];
                 detectionBoxes[boxIndex + 1] = boxCorners[boxCornorIndex + 1];
                 detectionBoxes[boxIndex + 2] = boxCorners[boxCornorIndex + 2];
                 detectionBoxes[boxIndex + 3] = boxCorners[boxCornorIndex + 3];
             }
             else
             {
                 detectionScores[i] = 0.0f;
                 detectionClasses[i] = 0.0f;
                 detectionBoxes[boxIndex] = 0.0f;
                 detectionBoxes[boxIndex + 1] = 0.0f;
                 detectionBoxes[boxIndex + 2] = 0.0f;
                 detectionBoxes[boxIndex + 3] = 0.0f;
             }
         }
         numDetections[0] = armnn::numeric_cast<float>(numSelected);
 }

◆ AllTypesAreEqualImpl() [1/2]

bool armnn::AllTypesAreEqualImpl ( T )

Definition at line 59 of file LayerSupportRules.hpp.

Referenced by AllTypesAreEqualImpl(), and TypesAreEqual::TypesAreEqual().

 {
     return true;
 }

◆ AllTypesAreEqualImpl() [2/2]

bool armnn::AllTypesAreEqualImpl	(	T	t1,
		T	t2,
		Rest...	rest
	)

Definition at line 65 of file LayerSupportRules.hpp.

References AllTypesAreEqualImpl().

 {
     static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
 
     return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
 }

◆ Append() [1/2]

void armnn::Append	(	Optimizer::Optimizations &	optimizations,
		T &&	optimization
	)

Definition at line 30 of file Optimizer.hpp.

Referenced by Append(), and MakeOptimizations().

 {
     optimizations.emplace_back(new T(optimization));
 };

◆ Append() [2/2]

void armnn::Append	(	Optimizer::Optimizations &	optimizations,
		Front &&	front,
		Others &&...	others
	)

Definition at line 36 of file Optimizer.hpp.

References Append().

 {
     Append<Front>(optimizations, std::forward<Front>(front));
     Append<Others...>(optimizations, std::forward<Others>(others)...);
 };

◆ ApplyBackendOptimizations()

OptimizationResult armnn::ApplyBackendOptimizations	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		BackendsMap &	backends,
		const ModelOptions &	modelOptions,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 1139 of file Network.cpp.

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), CpuAcc, Layer::GetBackendId(), OptimizedNetworkImpl::GetGraph(), SubgraphView::GetIConnectableLayers(), Layer::GetType(), GpuAcc, Input, OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MakeOptimizations(), Output, Optimizer::Pass(), ReportWarning(), SubgraphViewSelector::SelectSubgraphs(), Graph::SubstituteSubgraph(), and Undefined.

Referenced by Optimize().

 {
     ARMNN_ASSERT(optNetObjPtr);
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
     OptimizationResult result;
 
     // Get the optimized graph
     Graph& optGraph = optNetObjPtr->GetGraph();
 
     // Run backend specific optimizations
     for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
     {
         auto backendObjPtr = backends.find(selectedBackend)->second.get();
         ARMNN_ASSERT(backendObjPtr);
 
         if(selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
         {
             Optimizer::Pass(optGraph, MakeOptimizations(optimizations::PermuteDepthwiseConv2dWeights()));
             Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
         }
 
         // Select sub-graphs based on backend
         SubgraphViewSelector::Subgraphs subgraphs =
                 SubgraphViewSelector::SelectSubgraphs(optGraph,
                                                       // Select layers assigned to the requested backend
                                                       [&backendObjPtr](const Layer& layer)
                                                       {
 
                                                           return layer.GetType() != LayerType::Input &&
                                                                  layer.GetType() != LayerType::Output &&
                                                                  layer.GetBackendId() == backendObjPtr->GetId();
                                                       });
         if (subgraphs.empty())
         {
             // No sub-graphs found, try with next selected backend
             continue;
         }
 
         // Try to optimize each sub-graph
         for (auto& subgraph : subgraphs)
         {
             // Try to optimize the current sub-graph
             ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
             OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
             ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
 
             // Optimization attempted, check the resulting optimized sub-graph
             for (auto& substitution : optimizationViews.GetSubstitutions())
             {
                 // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
                 SubgraphView& replacementSubgraph   = substitution.m_ReplacementSubgraph;
                 SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
                 optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
 
                 // Assign the current backend to the optimized sub-graph
                 const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
                 std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
                     {
                         ARMNN_ASSERT(l);
                         PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
                     });
             }
 
             if (!optimizationViews.GetFailedSubgraphs().empty())
             {
                 std::stringstream warningMsg;
                 warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
                 ReportWarning(warningMsg.str(), errMessages);
 
                 // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
                 BackendSettings settingsCopy(backendSettings);
                 if (!backendObjPtr->GetId().IsCpuRef())
                 {
                     // Add the current backend to the list of backends to ignore
                     settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
                 }
 
                 int count=0;
                 for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
                 {
                     // An error occurred: the optimization was attempted but not performed, try different backends
                     std::stringstream subgraphMsg;
                     subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
                                 << " layers inside sub-graph " << count++;
                     ReportWarning(subgraphMsg.str(), errMessages);
 
                     OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
                                                                            settingsCopy,
                                                                            *subgraph,
                                                                            errMessages);
                     if (reassignmentResult.m_Error)
                     {
                         // Failed to re-assign one of the remaining backends to each layer of the sub-graph
                         result.m_Error = true;
                         return result;
                     }
                 }
             }
         }
     }
 
     return result;
 }

◆ ArgMinMax() [1/3]

void ArgMinMax	(	Decoder< float > &	in,
		OUT *	out,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		ArgMinMaxFunction	function,
		int	axis
	)

Definition at line 16 of file ArgMinMax.cpp.

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), armnnUtils::GetUnsignedAxis(), IgnoreUnused(), Max, Min, and numeric_cast().

Referenced by TEST_SUITE().

 {
     IgnoreUnused(outputTensorInfo);
 
     unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
 
     const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
     const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
     const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
                                                                          uAxis + 1,
                                                                          inputTensorInfo.GetNumDimensions());
 
     for (unsigned int outer = 0; outer < outerElements; ++outer) {
         for (unsigned int inner = 0; inner < innerElements; ++inner) {
             in[outer * axisSize * innerElements + inner];
             auto tmpValue = in.Get();
             unsigned int tmpIndex = 0;
             for (unsigned int i = 1; i < axisSize; ++i) {
                 in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
                 const auto& value = in.Get();
                 if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) ||
                     (function == armnn::ArgMinMaxFunction::Max &&  value > tmpValue)) {
                     tmpValue = value;
                     tmpIndex = i;
                 }
             }
 
             out[outer * innerElements + inner] = armnn::numeric_cast<OUT>(tmpIndex);
         }
     }
 }

◆ ArgMinMax() [2/3]

template void armnn::ArgMinMax	(	Decoder< float > &	in,
		int32_t *	out,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		ArgMinMaxFunction	function,
		int	axis
	)

◆ ArgMinMax() [3/3]

template void armnn::ArgMinMax	(	Decoder< float > &	in,
		int64_t *	out,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		ArgMinMaxFunction	function,
		int	axis
	)

◆ ARMNN_DEPRECATED_MSG_REMOVAL_DATE() [1/2]

class armnn::ARMNN_DEPRECATED_MSG_REMOVAL_DATE	(	"Use ABI stable IStrategy instead."	,
		"22.05"
	)

Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
activationDescriptor	- ActivationDescriptor to configure the activation.
name	- Optional name for the layer.

Function that an addition layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that an arg min max layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
argMinMaxDescriptor	- ArgMinMaxDescriptor to configure the activation.
name	- Optional name for the layer.

Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
mean	- Pre-calculated mean for each channel.
variance	- Pre-calculated variance for each channel.
beta	- Per-channel additive factor.
gamma	- Per-channel multiplicative factor.
name	- Optional name for the layer.

Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
batchToSpaceNdDescriptor	- Description of the layer.
name	- Optional name for the layer.

Function a Comparison layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
comparisonDescriptor	- Description of the layer.
name	- Optional name for the layer.

Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
concatDescriptor	- ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation process. Number of Views must be equal to the number of inputs, and their order must match - e.g. first view corresponds to the first input, second view to the second input, etc....
name	- Optional name for the layer.

Function a layer with no inputs and a single output, which always corresponds to the passed in constant tensor should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
input	- Tensor to be provided as the only output of the layer. The layer will maintain its own copy of the tensor data, meaning the memory referenced by input can be freed or reused after this function is called.
name	- Optional name for the layer.

Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor	- Description of the 2D convolution layer.
name	- Optional name for the layer.

Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor	- Description of the 2D convolution layer.
weights	- Tensor for the weights data.
biases	- Optional tensor for the bias data. If specified, must match the output tensor shape.
name	- Optional name for the layer.

Function a depth to space layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
depthToSpaceDescriptor	- Parameters for the depth to space operation.
name	- Optional name for the layer.

Function that a 2D depthwise convolution layer with biases should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor	- Description of the 2D depthwise convolution layer.
name	- Optional name for the layer.

Function that a 2D depthwise convolution layer with biases should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor	- Description of the 2D depthwise convolution layer.
weights	- Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
biases	- Optional tensor for the bias data. If specified, must match the output tensor shape.
name	- Optional name for the layer.

Function that a Dequantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that a Detection PostProcess layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
descriptor	- Description of the Detection PostProcess layer.
anchors	- Tensor for the anchors.
name	- Optional name for the layer.

Function a division layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function a ElementwiseUnary layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
elementwiseUnaryDescriptor	- Description of the layer.
name	- Optional name for the layer.

Function a fill layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
fillDescriptor	- Description of the layer
name	- Optional name for the layer.

Function a floor layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
fullyConnectedDescriptor	- Description of the fully connected layer.
name	- Optional name for the layer.

Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
fullyConnectedDescriptor	- Description of the fully connected layer.
weights	- Tensor for the weights data.
biases	- Optional tensor for the bias data.
name	- Optional name for the layer.

Function a Gather layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
gatherDescriptor	- Parameters for the gather operation.
name	- Optional name for the layer.

Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
id	- User generated id to uniquely identify a particular input. The same id needs to be specified when passing the inputs to the IRuntime::EnqueueWorkload() function.
name	- Optional name for the layer.

Function that an instance normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
desc	- Parameters for the instance normalization operation.
name	- Optional name for the layer.

Function that an L2 normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked. Normalization is performed along dimension 1, but requires a 4d input.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
desc	- Parameters for the L2 normalization operation.
name	- Optional name for the layer.

Function that a log softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
logSoftmaxDescriptor	- LogSoftmaxDescriptor to configure the log softmax.
name	- Optional name for the layer.

Function that a logical binary layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
logicalBinaryDescriptor	- LogicalBinaryDescriptor to configure the logical unary layer.
name	- Optional name for the layer.

Function an Lstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
descriptor	- Parameters controlling the operation of the Lstm operation.
params	- The weights and biases for the LSTM cell.
name	- Optional name for the layer.

Function a Maximum layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function a Mean layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
meanDescriptor	- Parameters for the mean operation.
name	- Optional name for the layer.

Function that a merge layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function a Minimum layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that a multiplication layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that a normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
normalizationDescriptor	- NormalizationDescriptor to configure the normalization.
name	- Optional name for the layer.

Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
id	- User generated id to uniquely identify a particular output. The same id needs to be specified when passing the outputs to the IRuntime::EnqueueWorkload() function.
name	- Optional name for the layer.

Function a pad layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
paddings	- n by 2 tensor, where n is the rank of the input tensor, such that paddings[i,0] indicates the amount of padding to add in front of dimension i, and paddings[i,1] indicates the amount of padding to add after the end of dimension i
name	- Optional name for the layer.

Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
permuteDescriptor	- PermuteDescriptor to configure the permute.
name	- Optional name for the layer.

Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
pooling2dDescriptor	- Pooling2dDescriptor to configure the pooling.
name	- Optional name for the layer.

Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
pooling3dDescriptor	- Pooling3dDescriptor to configure the pooling.
name	- Optional name for the layer.

Function that a PReLU activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function a quantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function a QLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
descriptor	- Parameters controlling the operation of the QLstm operation.
params	- The weights and biases for the layer
name	- Optional name for the layer.

Function a QuantizedLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
params	- The weights and biases for the Quantized LSTM cell
name	- Optional name for the layer.

Function a rank layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that a reduce layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
ReduceDescriptor	- Parameters for the reduce max operation.
name	- Optional name for the layer.

Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
reshapeDescriptor	- Parameters for the reshape operation.
name	- Optional name for the layer.

Function that a resize layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
resizeDescriptor	- Parameters for the resize operation.
name	- Optional name for the layer.

Function that a slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
sliceDescriptor	- SliceDescriptor to configure the slice operation.
name	- Optional name for the layer.

Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
softmaxDescriptor	- SoftmaxDescriptor to configure the softmax.
name	- Optional name for the layer.

Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
spaceToBatchNdDescriptor	- Parameters for the space to batch operation.
name	- Optional name for the layer.

Function a space to depth layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
spaceToDepthDescriptor	- Parameters for the space to depth operation.
name	- Optional name for the layer.

Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
splitterDescriptor	- ViewsDescriptor to configure the splitting process. Number of Views must be equal to the number of outputs, and their order must match - e.g. first view corresponds to the first output, second view to the second output, etc....
name	- Optional name for the layer.

Function a stack layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
stackDescriptor	- Parameters for the stack operation.
name	- Optional name for the layer.

Function a StandInLayer should call back to when its Accept(ILaterVisitor&) function is invoked

Parameters

layer	- pointer to the layer which is calling back to this visit function.
standInDescriptor	- Parameters for the stand-in layer.
name	- Optional name for the layer.

Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
stridedSliceDescriptor	- Parameters for the strided slice operation.
name	- Optional name for the layer.

Function a subtraction layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function a switch layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
name	- Optional name for the layer.

Function that a 2D transpose convolution layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
descriptor	- Description of the 2D transpose convolution layer.
weights	- Tensor for the weights data.
biases	- Optional tensor for the bias data.
name	- Optional name for the layer.

Function that a transpose layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters

layer	- pointer to the layer which is calling back to this visit function.
transposeDescriptor	- TransposeDescriptor to configure the transpose.
name	- Optional name for the layer.

Definition at line 16 of file ILayerVisitor.hpp.

References ARMNN_DEPRECATED_MSG, and ARMNN_DEPRECATED_MSG_REMOVAL_DATE().

 {
 protected:
     ILayerVisitor() {}
     virtual ~ILayerVisitor() {}
 
 public:
 
     /// Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param activationDescriptor - ActivationDescriptor to configure the activation.
     /// @param name - Optional name for the layer.
     virtual void VisitActivationLayer(const IConnectableLayer* layer,
                                       const ActivationDescriptor& activationDescriptor,
                                       const char* name = nullptr) = 0;
 
     /// Function that an addition layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitAdditionLayer(const IConnectableLayer* layer,
                                     const char* name = nullptr) = 0;
 
     /// Function that an arg min max layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param argMinMaxDescriptor - ArgMinMaxDescriptor to configure the activation.
     /// @param name - Optional name for the layer.
     virtual void VisitArgMinMaxLayer(const IConnectableLayer* layer,
                                      const ArgMinMaxDescriptor& argMinMaxDescriptor,
                                      const char* name = nullptr) = 0;
 
     /// Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param mean - Pre-calculated mean for each channel.
     /// @param variance - Pre-calculated variance for each channel.
     /// @param beta - Per-channel additive factor.
     /// @param gamma - Per-channel multiplicative factor.
     /// @param name - Optional name for the layer.
     virtual void VisitBatchNormalizationLayer(const IConnectableLayer* layer,
                                               const BatchNormalizationDescriptor& desc,
                                               const ConstTensor& mean,
                                               const ConstTensor& variance,
                                               const ConstTensor& beta,
                                               const ConstTensor& gamma,
                                               const char* name = nullptr) = 0;
 
     /// Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param batchToSpaceNdDescriptor - Description of the layer.
     /// @param name - Optional name for the layer.
     virtual void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
                                           const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
                                           const char* name = nullptr) = 0;
 
     /// Function a Comparison layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param comparisonDescriptor - Description of the layer.
     /// @param name - Optional name for the layer.
     virtual void VisitComparisonLayer(const IConnectableLayer* layer,
                                       const ComparisonDescriptor& comparisonDescriptor,
                                       const char* name = nullptr) = 0;
 
     /// Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param concatDescriptor - ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation
     ///                           process. Number of Views must be equal to the number of inputs, and their order
     ///                           must match - e.g. first view corresponds to the first input, second view to the
     ///                           second input, etc....
     /// @param name - Optional name for the layer.
     virtual void VisitConcatLayer(const IConnectableLayer* layer,
                                   const OriginsDescriptor& concatDescriptor,
                                   const char* name = nullptr) = 0;
 
     /// Function a layer with no inputs and a single output, which always corresponds to
     /// the passed in constant tensor should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain
     ///                its own copy of the tensor data, meaning the memory referenced by @a input can
     ///                be freed or reused after this function is called.
     /// @param name - Optional name for the layer.
     virtual void VisitConstantLayer(const IConnectableLayer* layer,
                                     const ConstTensor& input,
                                     const char* name = nullptr) = 0;
 
     /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param convolution2dDescriptor - Description of the 2D convolution layer.
     /// @param name - Optional name for the layer.
     virtual void VisitConvolution2dLayer(const IConnectableLayer* layer,
                                          const Convolution2dDescriptor& convolution2dDescriptor,
                                          const char* name = nullptr) = 0;
 
     /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param convolution2dDescriptor - Description of the 2D convolution layer.
     /// @param weights - Tensor for the weights data.
     /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
     /// @param name - Optional name for the layer.
     ARMNN_DEPRECATED_MSG("Use VisitConvolution2dLayer without ConstTensors")
     virtual void VisitConvolution2dLayer(const IConnectableLayer* layer,
                                          const Convolution2dDescriptor& convolution2dDescriptor,
                                          const ConstTensor& weights,
                                          const Optional<ConstTensor>& biases,
                                          const char* name = nullptr) = 0;
 
     /// Function a depth to space layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param depthToSpaceDescriptor - Parameters for the depth to space operation.
     /// @param name - Optional name for the layer.
     virtual void VisitDepthToSpaceLayer(const IConnectableLayer* layer,
                                         const DepthToSpaceDescriptor& depthToSpaceDescriptor,
                                         const char* name = nullptr) = 0;
 
     /// Function that a 2D depthwise convolution layer with biases should call back to when its
     /// Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
     /// @param name - Optional name for the layer.
     virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
                                                   const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
                                                   const char* name = nullptr) = 0;
 
     /// Function that a 2D depthwise convolution layer with biases should call back to when its
     /// Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
     /// @param weights - Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
     /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
     /// @param name - Optional name for the layer.
     ARMNN_DEPRECATED_MSG("Use VisitDepthwiseConvolution2dLayer without ConstTensors")
     virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
                                                   const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
                                                   const ConstTensor& weights,
                                                   const Optional<ConstTensor>& biases,
                                                   const char* name = nullptr) = 0;
 
     /// Function that a Dequantize layer should call back to when its
     /// Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitDequantizeLayer(const IConnectableLayer* layer,
                                       const char* name = nullptr) = 0;
 
     /// Function that a Detection PostProcess layer should call back to when its
     /// Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param descriptor - Description of the Detection PostProcess layer.
     /// @param anchors - Tensor for the anchors.
     /// @param name - Optional name for the layer.
     virtual void VisitDetectionPostProcessLayer(const IConnectableLayer* layer,
                                                 const DetectionPostProcessDescriptor& descriptor,
                                                 const ConstTensor& anchors,
                                                 const char* name = nullptr) = 0;
 
     /// Function a division layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitDivisionLayer(const IConnectableLayer* layer,
                                     const char* name = nullptr) = 0;
 
     /// Function a ElementwiseUnary layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param elementwiseUnaryDescriptor - Description of the layer.
     /// @param name - Optional name for the layer.
     virtual void VisitElementwiseUnaryLayer(const IConnectableLayer* layer,
                                             const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
                                             const char* name = nullptr) = 0;
 
     /// Function a fill layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param fillDescriptor - Description of the layer
     /// @param name - Optional name for the layer.
     virtual void VisitFillLayer(const IConnectableLayer* layer,
                                 const FillDescriptor& fillDescriptor,
                                 const char* name = nullptr) = 0;
 
     /// Function a floor layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitFloorLayer(const IConnectableLayer* layer,
                                  const char* name = nullptr) = 0;
 
 
     /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param fullyConnectedDescriptor - Description of the fully connected layer.
     /// @param name - Optional name for the layer.
     virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
                                           const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                           const char* name = nullptr) = 0;
 
     /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param fullyConnectedDescriptor - Description of the fully connected layer.
     /// @param weights - Tensor for the weights data.
     /// @param biases - Optional tensor for the bias data.
     /// @param name - Optional name for the layer.
     ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use VisitFullyConnectedLayer without ConstTensors", "22.05")
     virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
                                           const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                           const ConstTensor& weights,
                                           const Optional<ConstTensor>& biases,
                                           const char* name = nullptr) = 0;
 
     /// Function a Gather layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param gatherDescriptor - Parameters for the gather operation.
     /// @param name - Optional name for the layer.
     virtual void VisitGatherLayer(const IConnectableLayer* layer,
                                   const GatherDescriptor& gatherDescriptor,
                                   const char* name = nullptr) = 0;
 
     /// Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified
     ///             when passing the inputs to the IRuntime::EnqueueWorkload() function.
     /// @param name - Optional name for the layer.
     virtual void VisitInputLayer(const IConnectableLayer* layer,
                                  LayerBindingId id,
                                  const char* name = nullptr) = 0;
 
     /// Function that an instance normalization layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param desc - Parameters for the instance normalization operation.
     /// @param name - Optional name for the layer.
     virtual void VisitInstanceNormalizationLayer(const IConnectableLayer* layer,
                                                  const InstanceNormalizationDescriptor& desc,
                                                  const char* name = nullptr) = 0;
 
     /// Function that an L2 normalization layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked. Normalization is performed along dimension 1, but requires a 4d input.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param desc - Parameters for the L2 normalization operation.
     /// @param name - Optional name for the layer.
     virtual void VisitL2NormalizationLayer(const IConnectableLayer* layer,
                                            const L2NormalizationDescriptor& desc,
                                            const char* name = nullptr) = 0;
 
     /// Function that a log softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param logSoftmaxDescriptor - LogSoftmaxDescriptor to configure the log softmax.
     /// @param name - Optional name for the layer.
     virtual void VisitLogSoftmaxLayer(const IConnectableLayer* layer,
                                       const LogSoftmaxDescriptor& logSoftmaxDescriptor,
                                       const char* name = nullptr) = 0;
 
     /// Function that a logical binary layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param logicalBinaryDescriptor - LogicalBinaryDescriptor to configure the logical unary layer.
     /// @param name - Optional name for the layer.
     virtual void VisitLogicalBinaryLayer(const IConnectableLayer* layer,
                                          const LogicalBinaryDescriptor& logicalBinaryDescriptor,
                                          const char* name = nullptr) = 0;
 
     /// Function an Lstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param descriptor - Parameters controlling the operation of the Lstm operation.
     /// @param params - The weights and biases for the LSTM cell.
     /// @param name - Optional name for the layer.
     virtual void VisitLstmLayer(const IConnectableLayer* layer,
                                 const LstmDescriptor& descriptor,
                                 const LstmInputParams& params,
                                 const char* name = nullptr) = 0;
 
     /// Function a Maximum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitMaximumLayer(const IConnectableLayer* layer,
                                    const char* name = nullptr) = 0;
 
     /// Function a Mean layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param meanDescriptor - Parameters for the mean operation.
     /// @param name - Optional name for the layer.
     virtual void VisitMeanLayer(const IConnectableLayer* layer,
                                 const MeanDescriptor& meanDescriptor,
                                 const char* name = nullptr) = 0;
 
     /// Function that a merge layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitMergeLayer(const IConnectableLayer* layer,
                                  const char* name = nullptr) = 0;
 
     /// Function a Minimum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitMinimumLayer(const IConnectableLayer* layer,
                                    const char* name = nullptr) = 0;
 
     /// Function that a multiplication layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitMultiplicationLayer(const IConnectableLayer* layer,
                                           const char* name = nullptr) = 0;
 
     /// Function that a normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization.
     /// @param name - Optional name for the layer.
     virtual void VisitNormalizationLayer(const IConnectableLayer* layer,
                                          const NormalizationDescriptor& normalizationDescriptor,
                                          const char* name = nullptr) = 0;
 
     /// Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified
     /// when passing the outputs to the IRuntime::EnqueueWorkload() function.
     /// @param name - Optional name for the layer.
     virtual void VisitOutputLayer(const IConnectableLayer* layer,
                                   LayerBindingId id,
                                   const char* name = nullptr) = 0;
 
     /// Function a pad layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param paddings - n by 2 tensor, where n is the rank of the input tensor,
     ///                   such that paddings[i,0] indicates the amount of padding to add in front of dimension i, and
     ///                   paddings[i,1] indicates the amount of padding to add after the end of dimension i
     /// @param name - Optional name for the layer.
     virtual void VisitPadLayer(const IConnectableLayer* layer,
                                const PadDescriptor& padDescriptor,
                                const char* name = nullptr) = 0;
 
     /// Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param permuteDescriptor - PermuteDescriptor to configure the permute.
     /// @param name - Optional name for the layer.
     virtual void VisitPermuteLayer(const IConnectableLayer* layer,
                                    const PermuteDescriptor& permuteDescriptor,
                                    const char* name = nullptr) = 0;
 
     /// Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
     /// @param name - Optional name for the layer.
     virtual void VisitPooling2dLayer(const IConnectableLayer* layer,
                                      const Pooling2dDescriptor& pooling2dDescriptor,
                                      const char* name = nullptr) = 0;
 
     /// Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param pooling3dDescriptor - Pooling3dDescriptor to configure the pooling.
     /// @param name - Optional name for the layer.
     virtual void VisitPooling3dLayer(const IConnectableLayer* layer,
                                      const Pooling3dDescriptor& pooling3dDescriptor,
                                      const char* name = nullptr) = 0;
 
     /// Function that a PReLU activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitPreluLayer(const IConnectableLayer* layer,
                                  const char* name = nullptr) = 0;
 
     /// Function a quantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitQuantizeLayer(const IConnectableLayer* layer,
                                     const char* name = nullptr) = 0;
 
     /// Function a QLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param descriptor - Parameters controlling the operation of the QLstm operation.
     /// @param params - The weights and biases for the layer
     /// @param name - Optional name for the layer.
     virtual void VisitQLstmLayer(const IConnectableLayer* layer,
                                  const QLstmDescriptor& descriptor,
                                  const LstmInputParams& params,
                                  const char* name = nullptr) = 0;
 
     /// Function a QuantizedLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param params - The weights and biases for the Quantized LSTM cell
     /// @param name - Optional name for the layer.
     virtual void VisitQuantizedLstmLayer(const IConnectableLayer* layer,
                                          const QuantizedLstmInputParams& params,
                                          const char* name = nullptr) = 0;
 
     /// Function a rank layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitRankLayer(const IConnectableLayer* layer,
                                 const char* name = nullptr) = 0;
 
     /// Function that a reduce layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param ReduceDescriptor - Parameters for the reduce max operation.
     /// @param name - Optional name for the layer.
     virtual void VisitReduceLayer(const IConnectableLayer* layer,
                                   const ReduceDescriptor& reduceDescriptor,
                                   const char* name = nullptr) = 0;
 
     /// Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param reshapeDescriptor - Parameters for the reshape operation.
     /// @param name - Optional name for the layer.
     virtual void VisitReshapeLayer(const IConnectableLayer* layer,
                                    const ReshapeDescriptor& reshapeDescriptor,
                                    const char* name = nullptr) = 0;
 
     /// Function that a resize layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param resizeDescriptor - Parameters for the resize operation.
     /// @param name - Optional name for the layer.
     virtual void VisitResizeLayer(const IConnectableLayer* layer,
                                   const ResizeDescriptor& resizeDescriptor,
                                   const char* name = nullptr) = 0;
 
     /// Function that a slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param sliceDescriptor - SliceDescriptor to configure the slice operation.
     /// @param name - Optional name for the layer.
     virtual void VisitSliceLayer(const IConnectableLayer* layer,
                                  const SliceDescriptor& sliceDescriptor,
                                  const char* name = nullptr) = 0;
 
 
     /// Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax.
     /// @param name - Optional name for the layer.
     virtual void VisitSoftmaxLayer(const IConnectableLayer* layer,
                                    const SoftmaxDescriptor& softmaxDescriptor,
                                    const char* name = nullptr) = 0;
 
     /// Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param spaceToBatchNdDescriptor - Parameters for the space to batch operation.
     /// @param name - Optional name for the layer.
     virtual void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
                                           const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
                                           const char* name = nullptr) = 0;
 
     /// Function a space to depth layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param spaceToDepthDescriptor - Parameters for the space to depth operation.
     /// @param name - Optional name for the layer.
     virtual void VisitSpaceToDepthLayer(const IConnectableLayer* layer,
                                         const SpaceToDepthDescriptor& spaceToDepthDescriptor,
                                         const char* name = nullptr) = 0;
 
     /// Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param splitterDescriptor - ViewsDescriptor to configure the splitting process.
     ///                             Number of Views must be equal to the number of outputs,
     ///                             and their order must match - e.g. first view corresponds to
     ///                             the first output, second view to the second output, etc....
     /// @param name - Optional name for the layer.
     virtual void VisitSplitterLayer(const IConnectableLayer* layer,
                                     const ViewsDescriptor& splitterDescriptor,
                                     const char* name = nullptr) = 0;
 
     /// Function a stack layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param stackDescriptor - Parameters for the stack operation.
     /// @param name - Optional name for the layer.
     virtual void VisitStackLayer(const IConnectableLayer* layer,
                                  const StackDescriptor& stackDescriptor,
                                  const char* name = nullptr) = 0;
 
     /// Function a StandInLayer should call back to when its Accept(ILaterVisitor&) function is invoked
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param standInDescriptor - Parameters for the stand-in layer.
     /// @param name - Optional name for the layer.
     virtual void VisitStandInLayer(const IConnectableLayer* layer,
                                    const StandInDescriptor& standInDescriptor,
                                    const char* name = nullptr) = 0;
 
     /// Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param stridedSliceDescriptor - Parameters for the strided slice operation.
     /// @param name - Optional name for the layer.
     virtual void VisitStridedSliceLayer(const IConnectableLayer* layer,
                                         const StridedSliceDescriptor& stridedSliceDescriptor,
                                         const char* name = nullptr) = 0;
 
     /// Function a subtraction layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitSubtractionLayer(const IConnectableLayer* layer,
                                        const char* name = nullptr) = 0;
 
     /// Function a switch layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param name - Optional name for the layer.
     virtual void VisitSwitchLayer(const IConnectableLayer* layer,
                                   const char* name = nullptr) = 0;
 
     /// Function that a 2D transpose convolution layer should call back to when its Accept(ILayerVisitor&)
     /// function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param descriptor - Description of the 2D transpose convolution layer.
     /// @param weights - Tensor for the weights data.
     /// @param biases - Optional tensor for the bias data.
     /// @param name - Optional name for the layer.
     virtual void VisitTransposeConvolution2dLayer(const IConnectableLayer* layer,
                                                   const TransposeConvolution2dDescriptor& descriptor,
                                                   const ConstTensor& weights,
                                                   const Optional<ConstTensor>& biases,
                                                   const char* name = nullptr) = 0;
 
     /// Function that a transpose  layer should call back to when its Accept(ILayerVisitor&) function is invoked.
     /// @param layer - pointer to the layer which is calling back to this visit function.
     /// @param transposeDescriptor - TransposeDescriptor to configure the transpose.
     /// @param name - Optional name for the layer.
     virtual void VisitTransposeLayer(const IConnectableLayer* layer,
                                      const TransposeDescriptor& transposeDescriptor,
                                      const char* name = nullptr) = 0;
 
     virtual void StartVisit() {}
     virtual void FinishVisit() {}
 
 };

◆ ARMNN_DEPRECATED_MSG_REMOVAL_DATE() [2/2]

ARMNN_NO_DEPRECATE_WARN_BEGIN struct armnn::ARMNN_DEPRECATED_MSG_REMOVAL_DATE	(	"ResizeBilinearQueueDescriptor is deprecated use ResizeQueueDescriptor instead"	,
		"22.08"
	)

Definition at line 358 of file WorkloadData.hpp.

References ARMNN_NO_DEPRECATE_WARN_END.

Referenced by IWorkloadFactory::AfterWorkloadsCreated(), ARMNN_DEPRECATED_MSG_REMOVAL_DATE(), RefWorkloadFactory::CreateSubTensorHandle(), MockWorkloadFactory::CreateTensorHandle(), IBackendInternal::GetCapabilities(), NetworkImpl::GetGraph(), OptimizationViews::GetUntouchedSubgraphs(), main(), FullyConnectedDescriptor::operator==(), NeonWorkloadFactory::SupportsSubTensors(), and ClWorkloadFactory::SupportsSubTensors().

                               : QueueDescriptorWithParameters<ResizeBilinearDescriptor>
 {
     void Validate(const WorkloadInfo& workloadInfo) const;
 };

◆ AssertNumberOfInputSlots()

void armnn::AssertNumberOfInputSlots ( Layer & layer )

Definition at line 28 of file Layer.cpp.

References ARMNN_ASSERT, Convolution2d, DepthwiseConvolution2d, FullyConnected, Layer::GetNumInputSlots(), and Layer::GetType().

Referenced by InputSlot::Insert().

 {
     switch (layer.GetType())
     {
         case LayerType::Convolution2d:
         case LayerType::DepthwiseConvolution2d:
         case LayerType::FullyConnected:
         {
             ARMNN_ASSERT(layer.GetNumInputSlots() == 2 ||
                          layer.GetNumInputSlots() == 3);
             break;
         }
         default:
         {
             ARMNN_ASSERT(layer.GetNumInputSlots() == 1);
             break;
         }
     }
 }

◆ AssignBackends() [1/3]

OptimizationResult AssignBackends	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		Graph::Iterator &	firstLayer,
		Graph::Iterator &	lastLayer,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 1018 of file Network.cpp.

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

Referenced by ApplyBackendOptimizations(), AssignBackends(), Optimize(), and TEST_SUITE().

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
     OptimizationResult result;
 
     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
     if (availablePreferredBackends.empty())
     {
         std::stringstream failureMsg;
         failureMsg << "No preferred backends are available";
         ReportError(failureMsg.str(), errMessages);
 
         result.m_Error = true;
         return result;
     }
 
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         AssignBackendsIConnectable(optNetObjPtr,
                                    *it,
                                    errMessages,
                                    result,
                                    backendSettings,
                                    availablePreferredBackends);
     }
 
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         auto layer = PolymorphicDowncast<Layer*>(*it);
 
         if(layer->GetType() == LayerType::Input)
         {
             BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
             layer->SetBackendId(connectedBackendId);
         }
     }
 
     return result;
 }

◆ AssignBackends() [2/3]

OptimizationResult AssignBackends	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		SubgraphView::IConnectableLayerIterator &	firstLayer,
		SubgraphView::IConnectableLayerIterator &	lastLayer,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 1062 of file Network.cpp.

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
     OptimizationResult result;
 
     auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
     if (availablePreferredBackends.empty())
     {
         std::stringstream failureMsg;
         failureMsg << "No preferred backends are available";
         ReportError(failureMsg.str(), errMessages);
 
         result.m_Error = true;
         return result;
     }
 
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         AssignBackendsIConnectable(optNetObjPtr,
                                    *it,
                                    errMessages,
                                    result,
                                    backendSettings,
                                    availablePreferredBackends);
     }
 
     for (auto it = firstLayer; it != lastLayer; ++it)
     {
         auto layer = PolymorphicDowncast<Layer*>(*it);
 
         if(layer->GetType() == LayerType::Input)
         {
             BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
             layer->SetBackendId(connectedBackendId);
         }
     }
 
     return result;
 }

◆ AssignBackends() [3/3]

OptimizationResult armnn::AssignBackends	(	OptimizedNetworkImpl *	optNetObjPtr,
		BackendSettings &	backendSettings,
		SubgraphView &	subgraph,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 1106 of file Network.cpp.

References AssignBackends(), SubgraphView::beginIConnectable(), and SubgraphView::endIConnectable().

 {
     SubgraphView::IConnectableLayerIterator firstLayer = subgraph.beginIConnectable();
     SubgraphView::IConnectableLayerIterator lastLayer  = subgraph.endIConnectable();
     return AssignBackends(optNetObjPtr,
                           backendSettings,
                           firstLayer,
                           lastLayer,
                           errMessages);
 }

◆ AssignBackendsIConnectable()

void armnn::AssignBackendsIConnectable	(	OptimizedNetworkImpl *	optNetObjPtr,
		IConnectableLayer *	it,
		Optional< std::vector< std::string > &>	errMessages,
		OptimizationResult &	result,
		BackendSettings &	backendSettings,
		std::vector< BackendId > &	availablePreferredBackends
	)

Definition at line 905 of file Network.cpp.

References ARMNN_ASSERT_MSG, AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Constant, CpuRef, Float32, OptimizedNetworkImpl::GetGraph(), Input, BackendSettings::IsBackendSupported(), BackendSettings::IsCpuRefUsed(), OptimizationResult::IsError(), OptimizationResult::IsOk(), OptimizationResult::IsWarningOnly(), OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MemCopy, Permute, and ReturnWithError().

Referenced by AssignBackends().

 {
     auto ReturnError = [&](const Layer* layer)
     {
         return ReturnWithError(result, layer, backendSettings, errMessages);
     };
 
     auto layer = PolymorphicDowncast<Layer*>(it);
 
     if (layer->GetType() == LayerType::Input)
     {
         return;
     }
 
     DataType dataTypeIn  = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
                            layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
     DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
                            layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
 
     std::string reasonIfUnsupported;
     bool found = false;
     if (!CheckScaleSetOnQuantizedType(layer, errMessages))
     {
         // don't bomb immediately, find all the quantized outputs
         // which haven't had a scale set and report them all back.
         result.m_Error = true;
     }
 
     // First try assign layer to hint backend
     if (layer->GetBackendHint().has_value() &&
         backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
         AttemptBackendAssignment(backendSettings,
                                  optNetObjPtr->GetGraph(),
                                  layer,
                                  layer->GetBackendHint().value(),
                                  dataTypeIn,
                                  dataTypeOut,
                                  availablePreferredBackends,
                                  reasonIfUnsupported,
                                  errMessages).IsOk())
     {
         found = true;
         backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
     }
     else
     {
         // Try assign layer to prefered list of backends
         for (const auto& backend : availablePreferredBackends)
         {
             if (layer->GetBackendHint().has_value() &&
                 layer->GetBackendHint().value() == backend)
             {
                 continue; //Don't re-test the backend hint
             }
 
             OptimizationResult res = AttemptBackendAssignment(backendSettings,
                                                               optNetObjPtr->GetGraph(),
                                                               layer,
                                                               backend,
                                                               dataTypeIn,
                                                               dataTypeOut,
                                                               availablePreferredBackends,
                                                               reasonIfUnsupported,
                                                               errMessages);
 
             if (res.IsOk())
             {
                 found = true;
                 backendSettings.m_SelectedBackends.insert(backend);
                 break;
             }
             else if (res.IsError())
             {
                 result = res;  // Cannot continue.
                 // Note: we don't need to log the error as it would already
                 // be logged in AttemptBackendAssignment().
             }
             else
             {
                 ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
             }
         }
     }
 
     // If the layer is unsupported by any devices, log and return a null network.
     if (!found)
     {
         // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
         //       fallback we should set the compute device on the layer to CpuRef (these are not
         //       available as accelerated operations, or are only available under certain
         //       conditions, currently they comprise MemCopy, Constant, Permute)
         armnn::LayerType layerType = layer->GetType();
         if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
                                                 layerType == armnn::LayerType::Constant ||
                                                 layerType == armnn::LayerType::Permute))
         {
             BackendId cpuBackendId(armnn::Compute::CpuRef);
             layer->SetBackendId(cpuBackendId);
             backendSettings.m_SelectedBackends.insert(cpuBackendId);
         }
         else
         {
             result = ReturnError(layer);
         }
     }
 
 }

◆ AssignSplitId()

void armnn::AssignSplitId	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo
	)

Definition at line 309 of file SubgraphViewSelector.cpp.

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

 {
     // Check each input to see if we can attach ourselves to any of the subgraphs that have already been assigned.
     ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
     {
         // We can only attach ourselves to the subgraph from this input if there isn't a cut here.
         if (layerInfo.m_IsSelected == parentInfo.m_IsSelected)
         {
             // We also need to check that merging into this subgraph won't cause a dependency cycle between subgraphs.
             // This will be the case if the subgraph that we will become part of is already a dependency
             // of one of the subgraphs that are input to this layer, e.g:
             //
             //    0     |  The numbers (0, 1) are the subgraph IDs of each layer and we are looking at layer X.
             //   / \    |
             //  1   0   |  We can't merge X into subgraph 0, because the left-hand input already depends on subgraph 0.
             //   \ /    |  We can however merge X into subgraph 1.
             //    X     |
             //
             bool dependenciesOk = true;
             ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& otherParentInfo)
             {
                 // We call HasAntecedent() ~ n^2 times, where n is the number of inputs to this layer.
                 // Hence it is important that this is efficient - see PartialSubgraph class description.
                 if (otherParentInfo.m_Subgraph->HasAntecedent(parentInfo.m_Subgraph.get()))
                 {
                     dependenciesOk = false;
                 }
             });
 
             if (dependenciesOk)
             {
                 // Merge into the subgraph of this input. If we have already been merged into another subgraph
                 // (from another input of this layer), then merge both of them together.
                 if (layerInfo.m_Subgraph == nullptr)
                 {
                     layerInfo.m_Subgraph = parentInfo.m_Subgraph;
                 }
                 else
                 {
                     // We call MergeWith() ~ n times, where n is the number of inputs to this layer.
                     // Therefore it does not need to be as performant as HasAntecedent().
                     layerInfo.m_Subgraph->MergeWith(parentInfo.m_Subgraph.get());
                 }
             }
         }
     });
 
     // If we weren't able to merge into an existing subgraph then we need to make a new one
     if (layerInfo.m_Subgraph == nullptr)
     {
         layerInfo.m_Subgraph = std::make_shared<PartialSubgraph>();
     }
 
     // Record dependencies of the chosen subgraph based on the inputs of this layer.
     ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
     {
         // These functions are called ~n times, where n is the number of inputs to this layer.
         // Therefore it does not need to be as performant as HasAntecedent().
         if (!layerInfo.m_Subgraph->IsMergedWith(parentInfo.m_Subgraph.get()))
         {
             layerInfo.m_Subgraph->AddDirectAntecedent(parentInfo.m_Subgraph.get());
         }
     });
 }

◆ AttemptBackendAssignment()

OptimizationResult armnn::AttemptBackendAssignment	(	BackendSettings &	backendSettings,
		Graph &	graph,
		Layer *	layer,
		BackendId	backend,
		DataType	dataTypeIn,
		DataType	dataTypeOut,
		const std::vector< BackendId > &	availablePreferredBackends,
		std::string &	reasonIfUnsupported,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 654 of file Network.cpp.

References BFloat16, Constant, ConvertBf16ToFp32, FloatingPointConverter::ConvertFloat16To32(), ConvertFp16ToFp32, ConvertFp32ToBf16, ConvertFp32ToFp16, Convolution2d, Float16, Float32, FullyConnected, BackendId::Get(), Layer::GetBackendId(), GetDataTypeName(), Layer::GetInputSlots(), GetLayerTypeAsCString(), Layer::GetOutputSlot(), Layer::GetType(), info, InsertConvertBf16ToFp32LayersBefore(), InsertConvertFp16ToFp32LayersBefore(), InsertConvertFp32ToBf16LayersAfter(), InsertConvertFp32ToFp16LayersAfter(), IWorkloadFactory::IsLayerSupported(), ConstantLayer::m_LayerOutput, ReportWarning(), ReturnWithError(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AssignBackendsIConnectable().

 {
     OptimizationResult result;
 
     // Helper lambda to compose meaningful error message before returning with error
     auto ReturnError = [&](const Layer* layer)
         {
             return ReturnWithError(result, layer, backendSettings, errMessages);
         };
 
     // need to set the compute device on the layer
     // before we can check if it is supported
     layer->SetBackendId(backend);
     if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
     {
         if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
         {
             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
                 && layer->GetType() != LayerType::ConvertFp32ToFp16
                 && layer->GetType() != LayerType::ConvertFp16ToFp32)
             {
                 auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
                 {
                     if (layer.GetType() == LayerType::Constant)
                     {
                         ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
 
                         auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
 
                         if (info.GetDataType() == DataType::Float16)
                         {
                             std::vector<float> newValues(info.GetNumElements());
 
                             armnnUtils::FloatingPointConverter::ConvertFloat16To32(
                                     constantLayer->m_LayerOutput->GetConstTensor<Half>(),
                                     info.GetNumElements(),
                                     newValues.data());
 
                             TensorInfo newInfo(info);
                             newInfo.SetDataType(DataType::Float32);
                             ConstTensor newInput(newInfo, newValues);
                             constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
 
                             layer.GetOutputSlot(0).SetTensorInfo(newInfo);
                         }
                     }
                 };
 
                 bool checkType = false;
 
                 for (auto inputSlot : layer->GetInputSlots())
                 {
                     auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
                     if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
                     {
                         if (connectedOutputSlot->GetNumConnections() == 1)
                         {
                             checkType = true;
                             ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
                         }
                     }
                 }
 
                 // Insert FP16 -> FP32 conversion layer before current layer
                 std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
                 if (dataTypeIn == DataType::Float16)
                 {
                     convertFp16ToFp32Layers =
                             InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
                 }
 
                 // Insert FP32 -> FP16 conversion layer after current layer
                 std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
                 if (dataTypeOut == DataType::Float16)
                 {
                     convertFp32ToFp16Layers =
                         InsertConvertFp32ToFp16LayersAfter(graph, *layer);
                 }
 
                 // Assign a supported backend to the newly introduced conversion layers
                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
                     {
                         bool supportedBackendFound = false;
                         std::string reasonIfUnsupported;
 
                         // Try preferred backend first
                         layer->SetBackendId(preferredBackend);
                         if (IWorkloadFactory::IsLayerSupported(*layer,
                                                                EmptyOptional(),
                                                                reasonIfUnsupported))
                         {
                             supportedBackendFound = true;
                         }
                         else
                         {
                             for (const auto& backend : availablePreferredBackends)
                             {
                                 // Skip preferred backend (we already determined that it is not supported)
                                 if (backend == preferredBackend)
                                 {
                                     continue;
                                 }
 
                                 layer->SetBackendId(backend);
                                 if (IWorkloadFactory::IsLayerSupported(*layer,
                                                                        EmptyOptional(),
                                                                        reasonIfUnsupported))
                                 {
                                     supportedBackendFound = true;
                                     break;
                                 }
                             }
                         }
 
                         return supportedBackendFound;
                     };
 
                 for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
                 {
                     if (!AssignFirstSupportedBackend(convertLayer, backend))
                     {
                         return ReturnError(convertLayer);
                     }
                 }
 
                 for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
                 {
                     if (!AssignFirstSupportedBackend(convertLayer, backend))
                     {
                         return ReturnError(convertLayer);
                     }
                 }
 
                 return result;
             }
         }
         else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
         {
             if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
                 && layer->GetType() != LayerType::ConvertFp32ToBf16
                 && layer->GetType() != LayerType::ConvertBf16ToFp32)
             {
                 // Insert BF16 -> FP32 conversion layer before current layer
                 std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
                 if (dataTypeIn == DataType::BFloat16)
                 {
                     convertBf16ToFp32Layers =
                         InsertConvertBf16ToFp32LayersBefore(graph, *layer);
                     if (layer->GetType() == LayerType::Convolution2d)
                     {
                         ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
                     }
                     else if (layer->GetType() == LayerType::FullyConnected)
                     {
                         ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
                     }
                 }
 
                 // Insert FP32 -> BF16 conversion layer after current layer
                 std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
                 if (dataTypeOut == DataType::BFloat16)
                 {
                     convertFp32ToBf16Layers =
                         InsertConvertFp32ToBf16LayersAfter(graph, *layer);
                 }
 
                 // Assign a supported backend to the newly introduced conversion layers
                 auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
                     {
                         bool supportedBackendFound = false;
                         std::string reasonIfUnsupported;
 
                         // Try preferred backend first
                         layer->SetBackendId(preferredBackend);
                         if (IWorkloadFactory::IsLayerSupported(*layer,
                                                                EmptyOptional(),
                                                                reasonIfUnsupported))
                         {
                             supportedBackendFound = true;
                         }
                         else
                         {
                             for (const auto& backend : availablePreferredBackends)
                             {
                                 // Skip preferred backend (we already determined that it is not supported)
                                 if (backend == preferredBackend)
                                 {
                                     continue;
                                 }
 
                                 layer->SetBackendId(backend);
                                 if (IWorkloadFactory::IsLayerSupported(*layer,
                                                                        EmptyOptional(),
                                                                        reasonIfUnsupported))
                                 {
                                     supportedBackendFound = true;
                                     break;
                                 }
                             }
                         }
 
                         return supportedBackendFound;
                     };
 
                 for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
                 {
                     if (!AssignFirstSupportedBackend(convertLayer, backend))
                     {
                         return ReturnError(convertLayer);
                     }
                 }
 
                 for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
                 {
                     if (!AssignFirstSupportedBackend(convertLayer, backend))
                     {
                         return ReturnError(convertLayer);
                     }
                 }
 
                 return result;
             }
         }
 
         std::stringstream warningMsg;
         warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
                    << " is not supported on requested backend " << layer->GetBackendId().Get()
                    << " for input data type " << GetDataTypeName(dataTypeIn)
                    << " and output data type " << GetDataTypeName(dataTypeOut)
                    << " (reason: " << reasonIfUnsupported
                    << "), falling back to the next backend.";
         ReportWarning(warningMsg.str(), errMessages);
 
         return OptimizationResult(true, false);
     }
     else
     {
         return result;
     }
 }

◆ BackendRegistryInstance()

BackendRegistry & BackendRegistryInstance ( )

Definition at line 15 of file BackendRegistry.cpp.

Referenced by InferenceModel< IParser, TDataType >::AddCommandLineOptions(), CreateBackendObject(), CreateSupportedBackends(), DynamicBackendUtils::DeregisterDynamicBackends(), GetCapability(), GetILayerSupportByBackendId(), GetNumberOfCacheFiles(), GetSuitableBackendRegistered(), HasCapability(), ArmNNProfilingServiceInitialiser::InitialiseProfilingService(), IsCapabilitySupported(), main(), LoadedNetwork::MakeLoadedNetwork(), MockBackendInitialiser::MockBackendInitialiser(), MockImportBackendInitialiser::MockImportBackendInitialiser(), ProgramOptions::ProgramOptions(), LoadedNetwork::RegisterDebugCallback(), DynamicBackendUtils::RegisterDynamicBackends(), RuntimeEmptyTestImpl(), RuntimeImpl::RuntimeImpl(), RuntimeInvalidOverridePathTestImpl(), TEST_SUITE(), TestBackendRegistry::TestBackendRegistry(), MockBackendInitialiser::~MockBackendInitialiser(), MockImportBackendInitialiser::~MockImportBackendInitialiser(), RuntimeImpl::~RuntimeImpl(), and TestBackendRegistry::~TestBackendRegistry().

 {
     static BackendRegistry instance;
     return instance;
 }

◆ BatchNormImpl()

void BatchNormImpl	(	const BatchNormalizationQueueDescriptor &	data,
		Decoder< float > &	meanDecoder,
		Decoder< float > &	varianceDecoder,
		Decoder< float > &	betaDecoder,
		Decoder< float > &	gammaDecoder,
		Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 18 of file BatchNormImpl.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), GetTensorInfo(), DataLayoutIndexed::GetWidthIndex(), BatchNormalizationDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_Eps, QueueDescriptor::m_Inputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

Referenced by RefBatchNormalizationWorkload::ExecuteAsync().

 {
     const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
     const TensorShape inputShape = inputInfo.GetShape();
 
     armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
 
     unsigned int inputBatches  = inputShape[0];
     unsigned int inputHeight   = inputShape[dataLayout.GetHeightIndex()];
     unsigned int inputWidth    = inputShape[dataLayout.GetWidthIndex()];
     unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
 
     for (unsigned int c = 0; c < inputChannels; c++)
     {
         meanDecoder[c];
         varianceDecoder[c];
         betaDecoder[c];
         gammaDecoder[c];
         float mean  = meanDecoder.Get();
         float var   = varianceDecoder.Get();
         float beta  = betaDecoder.Get();
         float gamma = gammaDecoder.Get();
 
         float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
         float add  = beta - mult * mean;
 
         for (unsigned int n = 0; n < inputBatches; n++)
         {
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth; w++)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
                     inputDecoder[index];
                     outputEncoder[index];
                     outputEncoder.Set(mult * inputDecoder.Get() + add);
                 }
             }
         }
     }
 }

◆ BatchToSpaceNd()

void BatchToSpaceNd	(	const DataLayoutIndexed &	dataLayout,
		const TensorInfo &	inputTensorInfo,
		const TensorInfo &	outputTensorInfo,
		const std::vector< unsigned int > &	blockShape,
		const std::vector< std::pair< unsigned int, unsigned int >> &	cropsData,
		Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 35 of file BatchToSpaceNd.cpp.

References ARMNN_ASSERT_MSG, BatchToSpaceNd(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Offset(), and Encoder< IType >::Set().

Referenced by BatchToSpaceNd(), BatchToSpaceNdLayer::BatchToSpaceNdLayer(), and TEST_SUITE().

 {
     TensorShape inputShape = inputTensorInfo.GetShape();
 
     ARMNN_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Expected Input with 4 Dimensions");
 
     TensorShape outputShape = outputTensorInfo.GetShape();
 
     ARMNN_ASSERT_MSG(outputShape.GetNumDimensions() == 4, "Expected Output with 4 Dimensions");
 
     const unsigned int inputBatchSize = inputShape[0];
     const unsigned int channels = inputShape[dataLayout.GetChannelsIndex()];
 
     const unsigned int outputBatchSize = outputShape[0];
     const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
 
     ARMNN_ASSERT_MSG(blockShape.size() > 0, "BlockShape must contain 1 or more entries");
 
     const unsigned int blockShapeHeight = blockShape[0];
     const unsigned int blockShapeWidth = blockShape[1];
 
     ARMNN_ASSERT_MSG(cropsData.size() > 0, "Crops must contain 1 or more entries");
 
     const unsigned int cropsTop = cropsData[0].first;
     const unsigned int cropsLeft = cropsData[1].first;
 
     for (unsigned int inBatch = 0; inBatch < inputBatchSize; ++inBatch)
     {
         const unsigned int outBatch = inBatch % outputBatchSize;
         const unsigned int spatialOffset = inBatch / outputBatchSize;
 
         for (unsigned int inH = 0; inH < inputTensorInfo.GetShape()[dataLayout.GetHeightIndex()]; ++inH) {
             const unsigned int outH = inH * blockShapeHeight + spatialOffset / blockShapeWidth - cropsTop;
 
             if (outH >= outputHeight)
             {
                 continue;
             }
 
             for (unsigned int inW = 0; inW < inputTensorInfo.GetShape()[dataLayout.GetWidthIndex()]; ++inW) {
                 const unsigned int outW = inW * blockShapeWidth + spatialOffset % blockShapeWidth - cropsLeft;
 
                 if (outW >= outputWidth)
                 {
                     continue;
                 }
 
                 for (unsigned int c = 0; c < channels; c++)
                 {
                     unsigned int outOffset = Offset(outputShape, outBatch, outH, outW, c, dataLayout);
                     unsigned int inOffset = Offset(inputShape, inBatch, inH, inW, c, dataLayout);
 
                     outputEncoder[outOffset];
                     inputDecoder[inOffset];
                     outputEncoder.Set(inputDecoder.Get());
                 }
             }
         }
     }
 }

◆ CalcLevel()

int armnn::CalcLevel ( const Event * eventPtr )

Definition at line 246 of file Profiling.cpp.

References Event::GetParentEvent().

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults(), and ProfilerImpl::PopulateParent().

 {
     int level = 0;
     while (eventPtr != nullptr)
     {
         eventPtr = eventPtr->GetParentEvent();
         level++;
     }
     return level;
 }

◆ CalculateEdgeStrategy()

EdgeStrategy armnn::CalculateEdgeStrategy	(	BackendsMap &	backends,
		ITensorHandleFactory::FactoryId	srcFactoryId,
		const Layer &	layer,
		const Layer &	connectedLayer,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled
	)

Definition at line 1512 of file Network.cpp.

References ARMNN_ASSERT_MSG, CopyToTarget, DirectCompatibility, ExportToTarget, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, PaddingRequired, ITensorHandleFactory::SupportsMapUnmap(), and Undefined.

Referenced by SelectTensorHandleStrategy().

 {
     auto toBackend = backends.find(connectedLayer.GetBackendId());
     ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 
     auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
 
     // Legacy API check for backward compatibility
     if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
     {
         if (layer.GetBackendId() != connectedLayer.GetBackendId())
         {
             return EdgeStrategy::CopyToTarget;
         }
         else
         {
             return EdgeStrategy::DirectCompatibility;
         }
     }
 
     // TensorHandleFactory API present, so perform more sophisticated strategies.
     // Dst Output layers don't require copy because they use import or map/unmap
     if (connectedLayer.GetType() == LayerType::Output)
     {
         return EdgeStrategy::DirectCompatibility;
     }
 
     // Search for direct match in prefs
     for (auto&& pref : dstPrefs)
     {
         if (pref == srcFactoryId)
         {
             return EdgeStrategy::DirectCompatibility;
         }
     }
 
     // Search for export/import options
     ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
     if (srcFactory->GetExportFlags() != 0 && importEnabled)
     {
         for (auto&& pref : dstPrefs)
         {
             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
 
             // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
             if (!dstFactory) {
                 continue;
             }
             if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
             {
                 auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
                 auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
                                                                  &connectedLayer,
                                                                  CapabilityClass::PaddingRequired);
                 auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
                 auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
                                                                &connectedLayer,
                                                                CapabilityClass::FallbackImportDisabled);
                 // Do not require memory copy if the source and destination do not require padding.
                 if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
                 {
                     return EdgeStrategy::ExportToTarget;
                 }
             }
         }
     }
 
     // Search for copy options via map/unmap
     if (srcFactory->SupportsMapUnmap())
     {
         for (auto&& pref : dstPrefs)
         {
             ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
             if (dstFactory && dstFactory->SupportsMapUnmap())
             {
                 return EdgeStrategy::CopyToTarget;
             }
         }
     }
 
     return EdgeStrategy::Undefined;
 }

◆ CalculateGatherNdKeyIndices()

std::map< std::string, unsigned int > CalculateGatherNdKeyIndices	(	TensorInfo	inputInfo0,
		TensorInfo	inputInfo1
	)

Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)

Parameters

inputInfo0	- TensorInfo of the corresponding input tensor: params
inputInfo1	- TensorInfo of the corresponding input tensor: indices

Returns: - A map with names and values for N, ND, K, W, C

Definition at line 300 of file WorkloadUtils.cpp.

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by ClGatherNdWorkload::ClGatherNdWorkload(), ClGatherNdWorkloadValidate(), RefGatherNdWorkload::ExecuteAsync(), GatherTensorHandlePairs(), NeonGatherNdWorkload::NeonGatherNdWorkload(), and NeonGatherNdWorkloadValidate().

 {
     std::vector<unsigned int> paramsShape;
     for (unsigned int i = 0; i < inputInfo0.GetNumDimensions(); ++i)
     {
         paramsShape.push_back(inputInfo0.GetShape()[i]);
     }
 
     std::vector<unsigned int> indicesShape;
     for (unsigned int i = 0; i < inputInfo1.GetNumDimensions(); ++i)
     {
         indicesShape.push_back(inputInfo1.GetShape()[i]);
     }
 
     std::map<std::string, unsigned int> keyIndices;
 
     // N: number of batches
     keyIndices["N"] = 1;
 
     // ND: number of dimensions that are sliced from params
     keyIndices["ND"] = indicesShape.back();
 
     // W: number of indices in each batch (all but the last dimension)
     keyIndices["W"] =
         static_cast<unsigned int>(std::accumulate(std::begin(indicesShape),
                                                   std::end(indicesShape) - 1,
                                                   1,
                                                   std::multiplies<>() ));
     // K: range of each index
     keyIndices["K"] =
         static_cast<unsigned int>(std::accumulate(std::begin(paramsShape),
                                                   std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
                                                   1,
                                                   std::multiplies<>() ));
     //  C: number of channels for each index
     keyIndices["C"] =
         static_cast<unsigned int>(std::accumulate(std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
                                                   std::end(paramsShape),
                                                   1,
                                                   std::multiplies<>() ));
 
     return keyIndices;
 }

◆ CalculateSlotOption()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOption	(	BackendsMap &	backends,
		OutputSlot &	outputSlot,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled
	)

Definition at line 1362 of file Network.cpp.

References ARMNN_ASSERT_MSG, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), OutputSlot::GetConnections(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), IBackendInternal::GetHandleFactoryPreferences(), Layer::GetInputSlots(), OutputSlot::GetOwningLayer(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, RequiresCopy(), and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

 {
     // First ensure the from backends can support the TensorHandeAPI
     Layer& layer = outputSlot.GetOwningLayer();
     auto frmBackend = backends.find(layer.GetBackendId());
     if (frmBackend == backends.end() ||
         !frmBackend->second->SupportsTensorAllocatorAPI())
     {
         return ITensorHandleFactory::LegacyFactoryId;
     }
 
     bool outputConnection = false;
     for (auto&& connection : outputSlot.GetConnections())
     {
         const Layer& connectedLayer = connection->GetOwningLayer();
         if (connectedLayer.GetType() == LayerType::Output)
         {
             outputConnection = true;
         }
     }
 
     IBackendInternal* srcBackend = frmBackend->second.get();
     auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
 
     // Initialize the scores
     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
     for (auto&& pref : srcPrefs)
     {
         if (importEnabled)
         {
             ITensorHandleFactory* factory = registry.GetFactory(pref);
             if (outputConnection)
             {
                 // Check if this is fallback case
                 bool fallbackConnection = false;
                 for (auto&& inputSlot : layer.GetInputSlots())
                 {
                         if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
                         {
                             fallbackConnection = true;
                         }
                 }
                 if (fallbackConnection)
                 {
                     auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
                     // Cannot use factory import if fallback import is not supported.
                     if (!factoryCap.empty())
                     {
                         continue;
                     }
                 }
                 else if (factory->GetExportFlags() == 0)
                 {
                     continue;
                 }
             }
             if (!outputConnection)
             {
                 auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
                 // Cannot use factory import if fallback import is not supported.
                 if (!factoryCap.empty())
                 {
                     continue;
                 }
             }
 
         }
         else
         {
             // Only consider factories that support map/unmap
             ITensorHandleFactory* factory = registry.GetFactory(pref);
             if (!factory->SupportsMapUnmap())
             {
                 // The current tensor handle factory does not support the map/unmap strategy, move to the next one
                 continue;
             }
         }
 
 
         auto it = factoryScores.find(pref);
         if (it == factoryScores.end())
         {
             // Add new score to the table
             factoryScores[pref] = 0;
         }
     }
 
     // Score each handle factory based on how many times it requires copies on the slot connections
     for (auto&& connection : outputSlot.GetConnections())
     {
         const Layer& connectedLayer = connection->GetOwningLayer();
 
         auto toBackend = backends.find(connectedLayer.GetBackendId());
         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 
         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
         for (auto&& src : srcPrefs)
         {
             if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
             {
                 continue;
             }
 
             for (auto&& dst : dstPrefs)
             {
                 if (RequiresCopy(src, dst, registry))
                 {
                     // Copy avoided, increase the score
                     factoryScores[src]++;
                     break;
                 }
             }
         }
     }
 
     // Find the lowest score
     int minScore = std::numeric_limits<int>::max();
     for (auto it : factoryScores)
     {
         minScore = std::min(minScore, it.second);
     }
 
     // Collect factories matching the best(lowest) score
     std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
     for (auto it : factoryScores)
     {
         if (it.second == minScore)
         {
             optimalFactories.push_back(it.first);
         }
     }
 
     // For all compatible Factories matching the best score, find the preferred one for the current layer.
     for (auto&& srcPref : srcPrefs)
     {
         for (auto&& comp : optimalFactories)
         {
             if (comp == srcPref)
             {
                 return comp;
             }
         }
     }
 
     return ITensorHandleFactory::LegacyFactoryId;
 }

◆ CalculateSlotOptionForInput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForInput	(	BackendsMap &	backends,
		OutputSlot &	slot,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled
	)

Definition at line 1267 of file Network.cpp.

References ARMNN_ASSERT, ARMNN_ASSERT_MSG, Layer::GetBackendId(), OutputSlot::GetConnections(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), OutputSlot::GetOwningLayer(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

 {
     Layer& layer = slot.GetOwningLayer();
     ARMNN_ASSERT(layer.GetType() == LayerType::Input);
 
     // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
     // doesn't matter which backend it is assigned to because they all use the same implementation, which
     // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
     // select a factory with maximum compatibility with the layers connected to the InputLayer.
 
     // First ensure the from backends can support the TensorHandeAPI
     auto frmBackend = backends.find(layer.GetBackendId());
     if (frmBackend == backends.end() ||
         !frmBackend->second->SupportsTensorAllocatorAPI())
     {
         return ITensorHandleFactory::LegacyFactoryId;
     }
 
     // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
     // fewest copies.
     std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
     int topScore = 0;
     ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
 
     for (auto&& connection : slot.GetConnections())
     {
 
         const Layer& connectedLayer = connection->GetOwningLayer();
 
         auto toBackend = backends.find(connectedLayer.GetBackendId());
         ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
 
         if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
         {
             // The destination backend does not support the tensor allocator API, move to the next one
             continue;
         }
 
         auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
         for (auto&& dst : dstPrefs)
         {
             // Input layers use the mem copy workload or import, so the selected factory must
             // support either the map/unmap API or Import API
             ITensorHandleFactory* factory = registry.GetFactory(dst);
             if (importEnabled && factory->GetImportFlags() == 0)
             {
                 continue;
             }
             else if (!importEnabled && !factory->SupportsMapUnmap())
             {
                 continue;
             }
 
             auto it = factoryScores.find(dst);
             if (it == factoryScores.end())
             {
                 // Add new score to the table
                 factoryScores[dst] = 0;
                 if (topChoice == ITensorHandleFactory::LegacyFactoryId)
                 {
                     topChoice = dst;
                 }
             }
             else
             {
                 // Increase the score
                 factoryScores[dst]++;
 
                 // Track the best option
                 if (factoryScores[dst] > topScore)
                 {
                     topScore = factoryScores[dst];
                     topChoice = dst;
                 }
             }
         }
     }
 
     return topChoice;
 }

◆ CalculateSlotOptionForOutput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForOutput	(	BackendsMap &	backends,
		OutputSlot &	slot,
		TensorHandleFactoryRegistry &	registry
	)

Definition at line 1352 of file Network.cpp.

References ITensorHandleFactory::DeferredFactoryId, and IgnoreUnused().

Referenced by SelectTensorHandleStrategy().

 {
     IgnoreUnused(backends, slot, registry);
     return ITensorHandleFactory::DeferredFactoryId;
 }

◆ ChainReduceLayers()

std::vector<IConnectableLayer*> armnn::ChainReduceLayers	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ReduceDescriptor &	desc
	)

Definition at line 298 of file ArmComputeSubgraphUtils.hpp.

References ARMNN_ASSERT, ComputeReductionTensorShape(), OptimizationViews::GetINetwork(), Layer::GetInputSlot(), Layer::GetOutputSlot(), ReduceDescriptor::m_KeepDims, ReduceDescriptor::m_vAxis, and OutputSlot::SetTensorInfo().

 {
     // Vector of new chained layers, used for substitution.
     std::vector<IConnectableLayer*> layers;
 
     // Vector of axes so each layer is reshaped correctly.
     std::vector<uint32_t> axes;
     unsigned int recalulatedAxis = 0;
 
     for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
     {
         // Get TensorInfo from base layer and reduce shape using axis.
         TensorInfo layerInfo = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
 
         axes.emplace_back(desc.m_vAxis[i]);
 
         const TensorInfo& reducedTensorInfo = ComputeReductionTensorShape(layerInfo,
                                                                           axes,
                                                                           desc.m_KeepDims);
 
         // Create a vector for the single axis to be assigned to the descriptor.
         // Update axis if keepDims is set reduce layers correctly.
         std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);
 
         // Create a descriptor and assign single axis.
         ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters();
         newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end());
 
         // Add new layer to graph.
         std::string layerName = "reduce_layer_" + std::to_string(i);
 
         Layer* replacementLayer = PolymorphicDowncast<Layer*>(
             optimizationViews.GetINetwork()->AddReduceLayer(newReduceDescriptor,
                                                             layerName.c_str()));
 
         // Connect previous layer with new layer.
         // The first and last layer will be connected when the subgraph is replaced.
         if (!layers.empty())
         {
             layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0));
         }
 
         // Set updated tensorInfo for new layer.
         replacementLayer->GetOutputSlot(0).SetTensorInfo(reducedTensorInfo);
 
         if (!desc.m_KeepDims)
         {
             recalulatedAxis++;
         }
 
         layers.emplace_back(replacementLayer);
     }
 
     // Check if the TensorInfo from the last layer equals the inferred output from the original layer.
     ARMNN_ASSERT(baseLayer->GetOutputSlot(0).GetTensorInfo() ==
                  PolymorphicDowncast<Layer*>(layers.back())->GetOutputSlot().GetTensorInfo());
 
     return layers;
 }

◆ CheckFlag()

bool armnn::CheckFlag	(	MemorySourceFlags	flags,
		MemorySource	source
	)

inline

Definition at line 41 of file MemorySources.hpp.

Referenced by LoadedNetwork::FreeWorkingMemory(), LoadedNetwork::ImportInputs(), and LoadedNetwork::ImportOutputs().

 {
     return (static_cast<MemorySourceFlags>(source) & flags) != 0;
 }

◆ CheckLayerBindingId()

void armnn::CheckLayerBindingId	(	LayerBindingId	visitorId,
		LayerBindingId	id
	)

Definition at line 13 of file TestInputOutputLayerVisitor.hpp.

Referenced by TestInputLayerVisitor::ExecuteStrategy(), and TestOutputLayerVisitor::ExecuteStrategy().

 {
     CHECK_EQ(visitorId, id);
 }

◆ CheckScaleSetOnQuantizedType()

bool armnn::CheckScaleSetOnQuantizedType	(	Layer *	layer,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 595 of file Network.cpp.

References ARMNN_LOG, TensorInfo::GetDataType(), GetLayerTypeAsCString(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), OutputSlot::GetTensorInfo(), Layer::GetType(), info, QAsymmU8, ReportError(), TensorInfo::SetQuantizationOffset(), TensorInfo::SetQuantizationScale(), OutputSlot::SetTensorInfo(), Softmax, and warning.

Referenced by AssignBackendsIConnectable().

 {
     bool noErrors = true;
     unsigned int numOutputs = layer->GetNumOutputSlots();
     for (unsigned int i = 0; i < numOutputs; i++) {
         OutputSlot& outputSlot = layer->GetOutputSlot(i);
         TensorInfo info = outputSlot.GetTensorInfo();
         if (DataType::QAsymmU8 == info.GetDataType()) {
             if (0.f == info.GetQuantizationScale()) {
                 noErrors = false;
                 std::stringstream ss;
                 ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
                    << " (" << layer->GetNameStr() << ") is of type"
                    << " Quantized 8 bit but its scale parameter has not been set";
                 ReportError(ss.str(), errMessages);
             }
             // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
             if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
                  info.GetQuantizationOffset() != 0) &&
                  layer->GetType() == armnn::LayerType::Softmax)
             {
                 std::stringstream ss;
                 ss << "Quantization parameters for Softmax layer (Scale: " <<
                 info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
                 ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
                 ARMNN_LOG(warning) << ss.str();
                 info.SetQuantizationScale((1.0f /256.0f));
                 info.SetQuantizationOffset(0);
                 outputSlot.SetTensorInfo(info);
             }
         }
     }
     return noErrors;
 }

◆ CheckSupportRule()

bool armnn::CheckSupportRule	(	F	rule,
		Optional< std::string &>	reasonIfUnsupported,
		const char *	reason
	)

Definition at line 38 of file LayerSupportRules.hpp.

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

 {
     bool supported = rule();
     if (!supported && reason)
     {
         reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
     }
     return supported;
 }

◆ ClAbsWorkloadValidate()

arm_compute::Status ClAbsWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file ClAbsWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
 }

◆ ClActivationWorkloadValidate()

arm_compute::Status ClActivationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ActivationDescriptor &	descriptor
	)

Definition at line 17 of file ClActivationWorkload.cpp.

Referenced by ClLayerSupport::IsActivationSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
 
     return arm_compute::CLActivationLayer::validate(&aclInput,
                                                     &aclOutput,
                                                     activationLayerInfo);
 }

◆ ClAdditionValidate()

arm_compute::Status ClAdditionValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 45 of file ClAdditionWorkload.cpp.

Referenced by ClLayerSupport::IsAdditionSupported(), and ClBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
                                                                                       &aclInput1Info,
                                                                                       &aclOutputInfo,
                                                                                       g_AclConvertPolicy,
                                                                                       activationInfo);
 
     return aclStatus;
 }

◆ ClArgMinMaxWorkloadValidate()

arm_compute::Status ClArgMinMaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ArgMinMaxDescriptor &	descriptor
	)

Definition at line 31 of file ClArgMinMaxWorkload.cpp.

Referenced by ClLayerSupport::IsArgMinMaxSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     auto numDims = input.GetNumDimensions();
     auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
     int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
 
     if (descriptor.m_Function == ArgMinMaxFunction::Max)
     {
         return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MAX);
     }
     else
     {
         return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MIN);
     }
 }

◆ ClBackendId()

constexpr const char* armnn::ClBackendId ( )

Definition at line 10 of file ClBackendId.hpp.

Referenced by ClBackend::GetIdStatic().

10 { return "GpuAcc"; }

◆ ClBatchNormalizationValidate()

arm_compute::Status ClBatchNormalizationValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	mean,
		const TensorInfo &	var,
		const TensorInfo &	beta,
		const TensorInfo &	gamma,
		const BatchNormalizationDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file ClBatchNormalizationFloatWorkload.cpp.

Referenced by ClLayerSupport::IsBatchNormalizationSupported(), and ClBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInputInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclMeanInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclVarInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclBetaInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclGammaInfo =
         armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
                                                             &aclOutputInfo,
                                                             &aclMeanInfo,
                                                             &aclVarInfo,
                                                             &aclBetaInfo,
                                                             &aclGammaInfo,
                                                             descriptor.m_Eps,
                                                             activationInfo);
 }

◆ ClBatchToSpaceNdWorkloadValidate()

arm_compute::Status ClBatchToSpaceNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const BatchToSpaceNdDescriptor &	descriptor
	)

Definition at line 57 of file ClBatchToSpaceNdWorkload.cpp.

References BatchToSpaceNdDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsBatchToSpaceNdSupported().

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
 
     // ArmNN blockShape is [H, W] Cl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
 
     const arm_compute::Status aclStatus = arm_compute::CLBatchToSpaceLayer::validate(&aclInputInfo,
                                                                                      blockWidth,
                                                                                      blockHeight,
                                                                                      &aclOutputInfo);
     return aclStatus;
 }

◆ ClCastValidate()

arm_compute::Status ClCastValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 20 of file ClCastWorkload.cpp.

Referenced by ClLayerSupport::IsCastSupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLCast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
 }

◆ ClChannelShuffleValidate()

arm_compute::Status ClChannelShuffleValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ChannelShuffleDescriptor &	descriptor
	)

Definition at line 20 of file ClChannelShuffleWorkload.cpp.

Referenced by ClLayerSupport::IsChannelShuffleSupported().

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
     // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
     // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
     arm_compute::DataLayout aclDataLayout;
     if (input.GetNumDimensions() == 4)
     {
         switch (descriptor.m_Axis)
         {
             case 1:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
                 break;
             case 3:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
                 break;
             default:
                 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
         }
         aclInputInfo.set_data_layout(aclDataLayout);
         aclOutputInfo.set_data_layout(aclDataLayout);
         return arm_compute::CLChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
     }
 }

◆ ClComparisonWorkloadValidate()

arm_compute::Status ClComparisonWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ComparisonDescriptor &	descriptor
	)

Definition at line 24 of file ClComparisonWorkload.cpp.

Referenced by ClLayerSupport::IsComparisonSupported().

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
 
     const arm_compute::Status aclStatus = arm_compute::CLComparison::validate(&aclInput0Info,
                                                                               &aclInput1Info,
                                                                               &aclOutputInfo,
                                                                               comparisonOperation);
     return aclStatus;
 }

◆ ClConcatWorkloadValidate()

arm_compute::Status ClConcatWorkloadValidate	(	const std::vector< const TensorInfo *> &	inputs,
		const TensorInfo &	output,
		const OriginsDescriptor &	descriptor
	)

Definition at line 27 of file ClConcatWorkload.cpp.

Referenced by ClLayerSupport::IsConcatSupported().

 {
     std::vector<arm_compute::TensorInfo> aclInputs;
     for (const TensorInfo* input : inputs)
     {
         arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
         aclInputs.emplace_back(aclInputInfo);
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
     std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
     for (arm_compute::ITensorInfo& input : aclInputs)
     {
         aclInputPtrs.emplace_back(&input);
     }
 
     size_t aclAxis = CalcAxis(descriptor);
     return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
 }

◆ ClConstantWorkloadValidate()

arm_compute::Status ClConstantWorkloadValidate ( const TensorInfo & output )

Definition at line 18 of file ClConstantWorkload.cpp.

Referenced by ClLayerSupport::IsConstantSupported().

 {
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     std::array<arm_compute::DataType,8> supportedTypes = {
             arm_compute::DataType::F16,
             arm_compute::DataType::F32,
             arm_compute::DataType::QASYMM8,
             arm_compute::DataType::QASYMM8_SIGNED,
             arm_compute::DataType::QSYMM16,
             arm_compute::DataType::QSYMM8,
             arm_compute::DataType::QSYMM8_PER_CHANNEL,
             arm_compute::DataType::S32
     };
     auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
 
     if (it != end(supportedTypes))
     {
         return arm_compute::Status{};
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
     }
 }

◆ ClContextBufferHasIdentifier()

bool armnn::ClContextBufferHasIdentifier ( const void * buf )

inline

Definition at line 152 of file ClContextSchema_generated.h.

References ClContextIdentifier().

                                                           {
   return flatbuffers::BufferHasIdentifier(
       buf, ClContextIdentifier());
 }

◆ ClContextExtension()

const char* armnn::ClContextExtension ( )

inline

Definition at line 167 of file ClContextSchema_generated.h.

                                         {
   return "armnn";
 }

◆ ClContextIdentifier()

const char* armnn::ClContextIdentifier ( )

inline

Definition at line 148 of file ClContextSchema_generated.h.

Referenced by ClContextBufferHasIdentifier(), FinishClContextBuffer(), FinishSizePrefixedClContextBuffer(), VerifyClContextBuffer(), and VerifySizePrefixedClContextBuffer().

                                          {
   return "ARMN";
 }

◆ ClConvertFp16ToFp32WorkloadValidate()

arm_compute::Status ClConvertFp16ToFp32WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 44 of file ClConvertFp16ToFp32Workload.cpp.

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp16ToFp32Supported(), and ClConvertFp16ToFp32Workload::SupportsTensorHandleReplacement().

 {
     if (input.GetDataType() != DataType::Float16)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float16");
     }
     if (output.GetDataType() != DataType::Float32)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float32");
     }
 
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
         &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
 
     return aclStatus;
 }

◆ ClConvertFp32ToFp16WorkloadValidate()

arm_compute::Status ClConvertFp32ToFp16WorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 44 of file ClConvertFp32ToFp16Workload.cpp.

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp32ToFp16Supported(), and ClConvertFp32ToFp16Workload::SupportsTensorHandleReplacement().

 {
     if (input.GetDataType() != DataType::Float32)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float32");
     }
     if (output.GetDataType() != DataType::Float16)
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float16");
     }
 
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
         &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
 
     return aclStatus;
 }

◆ ClConvolution2dWorkloadValidate()

arm_compute::Status ClConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 23 of file ClConvolution2dWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by ClLayerSupport::IsConvolution2dSupported(), and ClBackend::OptimizeSubgraphView().

 {
     // The arm_compute::CLConvolutionLayer supports both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "ArmNN ClConvolution2dWorkload does not support non constant weights."};
     }
 
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
 
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         // Same for bias as weights. We don't currently support non const.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN ClConvolution2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
                                                      &aclWeightsInfo,
                                                      optionalAclBiasesInfo,
                                                      &aclOutputInfo,
                                                      layerInfo,
                                                      arm_compute::WeightsInfo(),
                                                      aclDilationInfo,
                                                      activationInfo,
                                                      isFastMathEnabled);
 }

◆ ClConvolution3dWorkloadValidate()

arm_compute::Status ClConvolution3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution3dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 23 of file ClConvolution3dWorkload.cpp.

Referenced by ClLayerSupport::IsConvolution3dSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
                                                                     isFastMathEnabled,
                                                                     activationDescriptor);
 
     return arm_compute::CLConv3D::validate(&aclInputInfo,
                                            &aclWeightsInfo,
                                            optionalAclBiasesInfo,
                                            &aclOutputInfo,
                                            aclConv3DInfo);
 }

◆ ClDepthToSpaceWorkloadValidate()

arm_compute::Status ClDepthToSpaceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthToSpaceDescriptor &	descriptor
	)

Definition at line 22 of file ClDepthToSpaceWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsDepthToSpaceSupported().

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
 
     int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
 
     const arm_compute::Status aclStatus = arm_compute::CLDepthToSpaceLayer::validate(&aclInputInfo,
                                                                                      &aclOutputInfo,
                                                                                      blockSize);
     return aclStatus;
 }

◆ ClDepthwiseConvolutionWorkloadValidate()

arm_compute::Status ClDepthwiseConvolutionWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 26 of file ClDepthwiseConvolutionWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by ClLayerSupport::IsDepthwiseConvolutionSupported(), ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and ClBackend::OptimizeSubgraphView().

 {
     // The CL implemented workload does support both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "ArmNN ClDepthwiseConv2dWorkload does not support non constant weights."};
     }
 
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input,  descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
     //
     // ACL format for weights for depthwise is:
     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
     // - [1, C, H, W] for [N, C, H, W] input/output layout
     //
     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
     // so we do the permute here for the TensorInfo weights.
     unsigned int aclDepthMultiplier;
     TensorInfo weightsPermuted;
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
 
     // Convert the weights into the compute library format
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         // Same for bias as weights. We don't currently support non const.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
             descriptor.m_DilationX,
             descriptor.m_DilationY);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
                                                               &aclWeightsInfo,
                                                               optionalAclBiasesInfo,
                                                               &aclOutputInfo,
                                                               aclPadStrideInfo,
                                                               aclDepthMultiplier,
                                                               activationInfo,
                                                               aclDilationInfo);
 
 }

◆ ClDequantizeWorkloadValidate()

arm_compute::Status ClDequantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 22 of file ClDequantizeWorkload.cpp.

Referenced by ClLayerSupport::IsDequantizeSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLDequantizationLayer::validate(&aclInputInfo, &aclOutputInfo);
 }

◆ ClDivisionWorkloadValidate()

arm_compute::Status ClDivisionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 18 of file ClDivisionWorkload.cpp.

Referenced by ClLayerSupport::IsDivisionSupported(), and ClBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo);
 }

◆ ClExpWorkloadValidate()

arm_compute::Status ClExpWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClExpWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
 }

◆ ClFloorWorkloadValidate()

arm_compute::Status ClFloorWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 14 of file ClFloorFloatWorkload.cpp.

Referenced by ClLayerSupport::IsFloorSupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
 }

◆ ClFullyConnectedWorkloadValidate()

arm_compute::Status ClFullyConnectedWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const FullyConnectedDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file ClFullyConnectedWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by ClLayerSupport::IsFullyConnectedSupported(), and ClBackend::OptimizeSubgraphView().

 {
     // The CL implemented workload does support both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                     "Arm NN ClFullyConnectedWorkload does not support non constant weights."};
     }
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
     arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
     aclWeights.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiases;
     arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         // Same for bias as weights. We don't currently support non const.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                         "Arm NN ClFullyConnectedWorkload does not support non constant bias."};
         }
         aclBiases = BuildArmComputeTensorInfo(biases.value());
         aclBiases.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiases = &aclBiases;
     }
 
     const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
     return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
                                                         &aclWeights,
                                                         optionalAclBiases,
                                                         &aclOutput,
                                                         fullyConnectedLayerInfo);
 }

◆ ClGatherNdWorkloadValidate()

arm_compute::Status ClGatherNdWorkloadValidate	(	const TensorInfo &	paramsInfo,
		const TensorInfo &	indicesInfo,
		const TensorInfo &	outputInfo
	)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 16 of file ClGatherNdWorkload.cpp.

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by ClLayerSupport::IsGatherNdSupported().

 {
     // Calculate ND, K, W, C.
     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
 
     /// Validate Mul
     // Indices with shape { W, ND }
     armnn::TensorInfo indices_W_ND_Info = indicesInfo;
     indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
     const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
 
     // Flattened coefficients with shape { ND }
     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
     const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
 
     // Output of Mul with shape { W, ND }
     const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
 
     auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
                                                                       &aclFlattenedCoeffInfo,
                                                                       &aclOutputMulInfo,
                                                                       1.0f,
                                                                       arm_compute::ConvertPolicy::WRAP,
                                                                       arm_compute::RoundingPolicy::TO_ZERO,
                                                                       arm_compute::ActivationLayerInfo());
 
     /// Validate ReduceSum
     // Flattened indices with shape { W }
     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
     flattenedIndices_Info.SetShape({ keyIndices["W"] });
     const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
 
     const std::vector<unsigned int> armnnReduceAxes(1, 1);
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
                                                                           indices_W_ND_Info.GetNumDimensions(),
                                                                           armnnReduceAxes);
 
     auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
                                                                        &aclFlattenedIndicesInfo,
                                                                        static_cast<unsigned int>(coords[0]),
                                                                        arm_compute::ReductionOperation::SUM,
                                                                        false);
 
     /// Validate Gather
     // Params with shape { K, C }
     armnn::TensorInfo params_K_C_Info =  paramsInfo;
     params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
     const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
 
     // Output of gather with shape { W, C }
     armnn::TensorInfo outputGather_Info = outputInfo;
     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
     const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
 
     auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
     auto statusGather =
             arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
 
     /// Validate Reshape
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
 
     auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
 
     /// Return OK if all the layers are valid
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusMul.error_code()       == okCode &&
         statusReduceSum.error_code() == okCode &&
         statusGather.error_code()    == okCode &&
         statusReshape.error_code()   == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All GatherND layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "GatherND layer validate status failed.");
     }
 }

◆ ClGatherWorkloadValidate()

arm_compute::Status ClGatherWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	indices,
		const TensorInfo &	output,
		const GatherDescriptor &	descriptor
	)

Definition at line 15 of file ClGatherWorkload.cpp.

Referenced by ClLayerSupport::IsGatherSupported().

 {
     const arm_compute::TensorInfo aclInput   = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
     const arm_compute::TensorInfo aclOutput  = BuildArmComputeTensorInfo(output);
 
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
 
     return arm_compute::CLGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
 }

◆ ClImportTensorHandleFactoryId()

constexpr const char* armnn::ClImportTensorHandleFactoryId ( )

Definition at line 15 of file ClImportTensorHandleFactory.hpp.

Referenced by ClImportTensorHandleFactory::GetIdStatic().

 {
     return "Arm/Cl/ImportTensorHandleFactory";
 }

◆ ClInstanceNormalizationWorkloadValidate()

arm_compute::Status ClInstanceNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const InstanceNormalizationDescriptor &	descriptor
	)

Definition at line 18 of file ClInstanceNormalizationWorkload.cpp.

Referenced by ClLayerSupport::IsInstanceNormalizationSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     return arm_compute::CLInstanceNormalizationLayer::validate(&aclInputInfo,
                                                                &aclOutputInfo,
                                                                descriptor.m_Gamma,
                                                                descriptor.m_Beta,
                                                                descriptor.m_Eps);
 }

◆ ClL2NormalizationWorkloadValidate()

arm_compute::Status ClL2NormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const L2NormalizationDescriptor &	descriptor
	)

Definition at line 17 of file ClL2NormalizationFloatWorkload.cpp.

Referenced by ClLayerSupport::IsL2NormalizationSupported().

 {
     const arm_compute::TensorInfo aclInput  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
 
     return arm_compute::CLL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
 }

◆ ClLogicalAndWorkloadValidate()

arm_compute::Status ClLogicalAndWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 20 of file ClLogicalAndWorkload.cpp.

Referenced by ClLayerSupport::IsLogicalBinarySupported().

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLLogicalAnd::validate(&aclInputInfo0,
                                                                               &aclInputInfo1,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

◆ ClLogicalNotWorkloadValidate()

arm_compute::Status ClLogicalNotWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 20 of file ClLogicalNotWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLLogicalNot::validate(&aclInputInfo,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

◆ ClLogicalOrWorkloadValidate()

arm_compute::Status ClLogicalOrWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 20 of file ClLogicalOrWorkload.cpp.

Referenced by ClLayerSupport::IsLogicalBinarySupported().

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLLogicalOr::validate(&aclInputInfo0,
                                                                              &aclInputInfo1,
                                                                              &aclOutputInfo);
     return aclStatus;
 }

◆ ClLogSoftmaxWorkloadValidate()

arm_compute::Status ClLogSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const LogSoftmaxDescriptor &	descriptor
	)

Definition at line 17 of file ClLogSoftmaxWorkload.cpp.

Referenced by ClLayerSupport::IsLogSoftmaxSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
 }

◆ ClLogWorkloadValidate()

arm_compute::Status ClLogWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClLogWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLLogLayer::validate(&aclInput, &aclOutput);
 }

◆ ClLstmFloatWorkloadValidate()

arm_compute::Status ClLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	scratchBuffer,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const LstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 244 of file ClLstmFloatWorkload.cpp.

Referenced by ClLayerSupport::IsLstmSupported().

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
 
     // The inputs and the outputs
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
     if (!descriptor.m_CifgEnabled)
     {
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
 
         if (paramsInfo.m_CellToInputWeights != nullptr)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
                                          paramsInfo.m_CellToInputWeights != nullptr ?
                                          &aclCellToInputWeightsInfo: nullptr,
                                          &aclInputGateBiasInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias != nullptr ?
                                                &aclProjectionBiasInfo: nullptr);
     }
 
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
 
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
 
     // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo  = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
 
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
 
         aclCellLayerNormWeightsInfo   = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
 
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
                                                         nullptr : &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
 
     return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
                                               &aclInputToCellWeightsInfo,
                                               &aclInputToOutputWeightsInfo,
                                               &aclRecurrentToForgetWeightsInfo,
                                               &aclRecurrentToCellWeightsInfo,
                                               &aclRecurrentToOutputWeightsInfo,
                                               &aclForgetGateBiasInfo,
                                               &aclCellBiasInfo,
                                               &aclOutputGateBiasInfo,
                                               &aclOutputStateInInfo, &aclCellStateInInfo,
                                               &aclScratchBufferInfo, &aclOutputStateOutInfo,
                                               &aclCellStateOutInfo, &aclOutputInfo,
                                               lstm_params_info, activationLayerInfo,
                                               cell_threshold, projection_threshold);
 }

◆ ClMaximumWorkloadValidate()

arm_compute::Status ClMaximumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 24 of file ClMaximumWorkload.cpp.

Referenced by ClLayerSupport::IsMaximumSupported().

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLElementwiseMax::validate(&aclInput0Info,
                                                                                   &aclInput1Info,
                                                                                   &aclOutputInfo);
 
     return aclStatus;
 }

◆ ClMeanValidate()

arm_compute::Status ClMeanValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const MeanDescriptor &	descriptor
	)

Definition at line 17 of file ClMeanWorkload.cpp.

Referenced by ClLayerSupport::IsMeanSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                           input.GetNumDimensions(),
                                                                           descriptor.m_Axis);
 
     return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
 }

◆ ClMinimumWorkloadValidate()

arm_compute::Status ClMinimumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 24 of file ClMinimumWorkload.cpp.

Referenced by ClLayerSupport::IsMinimumSupported().

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::CLElementwiseMin::validate(&aclInput0Info,
                                                                                   &aclInput1Info,
                                                                                   &aclOutputInfo);
 
     return aclStatus;
 }

◆ ClMultiplicationWorkloadValidate()

arm_compute::Status ClMultiplicationWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 18 of file ClMultiplicationWorkload.cpp.

Referenced by ClLayerSupport::IsMultiplicationSupported(), and ClBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
     return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
                                                             &aclInput2,
                                                             &aclOutput,
                                                             1.0f,
                                                             convertPolicy,
                                                             arm_compute::RoundingPolicy::TO_ZERO,
                                                             activationInfo);
 }

◆ ClNegWorkloadValidate()

arm_compute::Status ClNegWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClNegWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
 }

◆ ClNormalizationWorkloadValidate()

arm_compute::Status ClNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const NormalizationDescriptor &	descriptor
	)

Definition at line 19 of file ClNormalizationFloatWorkload.cpp.

Referenced by ClLayerSupport::IsNormalizationSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
 
     return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
 }

◆ ClPadValidate()

arm_compute::Status ClPadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PadDescriptor &	descriptor
	)

Definition at line 62 of file ClPadWorkload.cpp.

Referenced by ClLayerSupport::IsPadSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
 
     std::reverse_copy(std::begin(descriptor.m_PadList),
                       std::end(descriptor.m_PadList),
                       std::begin(reversed_PadList));
 
     arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
 
     // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
     arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
     const arm_compute::Status aclStatus =
             arm_compute::CLPadLayer::validate(&aclInputInfo,
                                               &aclOutputInfo,
                                               padList,
                                               pixelValue,
                                               ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
 
     return aclStatus;
 }

◆ ClPermuteWorkloadValidate()

arm_compute::Status ClPermuteWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PermuteDescriptor &	descriptor
	)

Definition at line 17 of file ClPermuteWorkload.cpp.

Referenced by ClLayerSupport::IsPermuteSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
 
     return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
                                             armcomputetensorutils::BuildArmComputePermutationVector(mappings));
 }

◆ ClPooling2dWorkloadValidate()

arm_compute::Status ClPooling2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling2dDescriptor &	descriptor
	)

Definition at line 18 of file ClPooling2dWorkload.cpp.

Referenced by ClLayerSupport::IsPooling2dSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
 
     return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
 }

◆ ClPooling3dWorkloadValidate()

arm_compute::Status ClPooling3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling3dDescriptor &	descriptor
	)

Definition at line 18 of file ClPooling3dWorkload.cpp.

Referenced by ClLayerSupport::IsPooling3dSupported().

     {
         const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
         const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
         arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
 
         return arm_compute::CLPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
     }

◆ ClPreluWorkloadValidate()

arm_compute::Status ClPreluWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	alpha,
		const TensorInfo &	output
	)

Definition at line 16 of file ClPreluWorkload.cpp.

Referenced by ClLayerSupport::IsPreluSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLPReluLayer::validate(&aclInput,
                                                &aclAlpha,
                                                &aclOutput);
 }

◆ ClQLstmWorkloadValidate()

arm_compute::Status ClQLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	output,
		const QLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 247 of file ClQLstmWorkload.cpp.

Referenced by ClLayerSupport::IsQLstmSupported().

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
 
     // Input/Output tensor info
     const arm_compute::TensorInfo aclInputInfo         = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo   = BuildArmComputeTensorInfo(cellStateIn);
 
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo   = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo         = BuildArmComputeTensorInfo(output);
 
     // Mandatory tensor info
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
         = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     // Optional tensor info
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
 
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
 
     arm_compute::TensorInfo aclInputGateBiasInfo;
 
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
 
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
     // Create tensor info for optional params if they are enabled
     if (descriptor.m_PeepholeEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
 
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
 
         // Set peephole params info
         aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
                                           &aclCellToOutputWeightsInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
 
         // Set projection params info
         aclParamsInfo.set_projection_params(
             &aclProjectionWeightsInfo,
             paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
     }
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
 
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo   = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         // Set layer norm params info
         aclParamsInfo.set_layer_normalization_params(
             paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
             &aclForgetLayerNormWeightsInfo,
             &aclCellLayerNormWeightsInfo,
             &aclOutputLayerNormWeightsInfo);
     }
 
     if (!descriptor.m_CifgEnabled)
     {
         aclInputToInputWeightsInfo     = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo           = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
 
         // Set CIFG params info
         aclParamsInfo.set_cifg_params(
             &aclInputToInputWeightsInfo,
             &aclRecurrentToInputWeightsInfo,
             paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
             &aclInputGateBiasInfo);
     }
 
     // Set scalar descriptor params
     aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
     aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
     aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
     aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
                                           descriptor.m_ForgetIntermediateScale,
                                           descriptor.m_CellIntermediateScale,
                                           descriptor.m_OutputIntermediateScale);
 
     // QLSTM CL validate
     return arm_compute::CLQLSTMLayer::validate(&aclInputInfo,
                                                &aclInputToForgetWeightsInfo,
                                                &aclInputToCellWeightsInfo,
                                                &aclInputToOutputWeightsInfo,
                                                &aclRecurrentToForgetWeightsInfo,
                                                &aclRecurrentToCellWeightsInfo,
                                                &aclRecurrentToOutputWeightsInfo,
                                                &aclForgetGateBiasInfo,
                                                &aclCellBiasInfo,
                                                &aclOutputGateBiasInfo,
                                                &aclCellStateInInfo,
                                                &aclOutputStateInInfo,
                                                &aclCellStateOutInfo,
                                                &aclOutputStateOutInfo,
                                                &aclOutputInfo,
                                                aclParamsInfo);
 }

◆ ClQuantizedLstmWorkloadValidate()

arm_compute::Status ClQuantizedLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	previousCellStateIn,
		const TensorInfo &	previousOutputIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const QuantizedLstmInputParamsInfo &	paramsInfo
	)

Definition at line 18 of file ClQuantizedLstmWorkload.cpp.

Referenced by ClLayerSupport::IsQuantizedLstmSupported().

 {
     // Inputs
     const arm_compute::TensorInfo aclInputInfo               = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclPreviousCellStateInInfo = BuildArmComputeTensorInfo(previousCellStateIn);
     const arm_compute::TensorInfo aclPreviousOutputInInfo    = BuildArmComputeTensorInfo(previousOutputIn);
 
     // Outputs
     const arm_compute::TensorInfo aclCellStateOutInfo        = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo              = BuildArmComputeTensorInfo(output);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclInputGateBiasInfo  = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
     const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     return arm_compute::CLLSTMLayerQuantized::validate(&aclInputInfo, &aclInputToInputWeightsInfo,
                                                        &aclInputToForgetWeightsInfo, &aclInputToCellWeightsInfo,
                                                        &aclInputToOutputWeightsInfo, &aclRecurrentToInputWeightsInfo,
                                                        &aclRecurrentToForgetWeightsInfo, &aclRecurrentToCellWeightsInfo,
                                                        &aclRecurrentToOutputWeightsInfo, &aclInputGateBiasInfo,
                                                        &aclForgetGateBiasInfo, &aclCellBiasInfo, &aclOutputGateBiasInfo,
                                                        &aclPreviousCellStateInInfo, &aclPreviousOutputInInfo,
                                                        &aclCellStateOutInfo, &aclOutputInfo);
 }

◆ ClQuantizeWorkloadValidate()

arm_compute::Status ClQuantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 22 of file ClQuantizeWorkload.cpp.

Referenced by ClLayerSupport::IsQuantizeSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLQuantizationLayer::validate(&aclInputInfo,
                                                       &aclOutputInfo);
 }

◆ ClReduceWorkloadValidate()

arm_compute::Status ClReduceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ReduceDescriptor &	descriptor
	)

Definition at line 18 of file ClReduceWorkload.cpp.

References ReduceDescriptor::m_vAxis.

Referenced by ClLayerSupport::IsReduceSupported().

 {
     if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
     {
         const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
         const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
         arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                               input.GetNumDimensions(),
                                                                               descriptor.m_vAxis);
 
         return arm_compute::CLReductionOperation::validate(&aclInputInfo,
                                                            &aclOutputInfo,
                                                            static_cast<unsigned int>(coords[0]),
                                                            ConvertReductionOperationToAcl(descriptor),
                                                            descriptor.m_KeepDims);
     }
     else
     {
         // Validate layer if there are multiple axes.
         arm_compute::Status status;
         IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
         return status;
     }
 }

◆ ClReshapeWorkloadValidate()

arm_compute::Status ClReshapeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 15 of file ClReshapeWorkload.cpp.

Referenced by ClLayerSupport::IsReshapeSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
 }

◆ ClResizeWorkloadValidate()

arm_compute::Status ClResizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ResizeDescriptor &	descriptor
	)

Definition at line 22 of file ClResizeWorkload.cpp.

Referenced by ClLayerSupport::IsResizeSupported().

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
     aclInputInfo.set_data_layout(aclDataLayout);
     aclOutputInfo.set_data_layout(aclDataLayout);
 
     arm_compute::InterpolationPolicy aclInterpolationPolicy =
         ConvertResizeMethodToAclInterpolationPolicy(descriptor.m_Method);
 
     arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
                                                                                  arm_compute::SamplingPolicy::TOP_LEFT;
 
     return arm_compute::CLScale::validate(&aclInputInfo,
                                           &aclOutputInfo,
                                           arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
                                                                        arm_compute::BorderMode::REPLICATE,
                                                                        arm_compute::PixelValue(0.f),
                                                                        samplingPolicy,
                                                                        true,
                                                                        descriptor.m_AlignCorners));
 }

◆ ClRsqrtWorkloadValidate()

arm_compute::Status ClRsqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClRsqrtWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
 }

◆ ClSinWorkloadValidate()

arm_compute::Status ClSinWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file ClSinWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::CLSinLayer::validate(&aclInput, &aclOutput);
 }

◆ ClSliceWorkloadValidate()

arm_compute::Status ClSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SliceDescriptor &	descriptor
	)

Definition at line 18 of file ClSliceWorkload.cpp.

Referenced by ClLayerSupport::IsSliceSupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
 
     std::tie(starts, ends) = SetClSliceData(descriptor.m_Begin, descriptor.m_Size);
 
     return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
 }

◆ ClSoftmaxWorkloadValidate()

arm_compute::Status ClSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor
	)

Definition at line 17 of file ClSoftmaxWorkload.cpp.

Referenced by ClLayerSupport::IsSoftmaxSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
 }

◆ ClSpaceToBatchNdWorkloadValidate()

arm_compute::Status ClSpaceToBatchNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToBatchNdDescriptor &	descriptor
	)

Definition at line 23 of file ClSpaceToBatchNdWorkload.cpp.

Referenced by ClLayerSupport::IsSpaceToBatchNdSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     // ArmNN blockShape is [H, W] Cl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth  = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
 
     arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(
         descriptor.m_PadList[1].first, descriptor.m_PadList[0].first);
     arm_compute::Size2D paddingRightBottom  = BuildArmComputeSize2D(
         descriptor.m_PadList[1].second, descriptor.m_PadList[0].second);
 
     return arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo,
                                                       blockWidth,
                                                       blockHeight,
                                                       paddingLeftTop,
                                                       paddingRightBottom,
                                                       &aclOutputInfo);
 }

◆ ClSpaceToDepthWorkloadValidate()

arm_compute::Status ClSpaceToDepthWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToDepthDescriptor &	descriptor
	)

Definition at line 54 of file ClSpaceToDepthWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsSpaceToDepthSupported().

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
 
     int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
 
     const arm_compute::Status aclStatus = arm_compute::CLSpaceToDepthLayer::validate(&aclInputInfo,
                                                                                      &aclOutputInfo,
                                                                                      blockSize);
     return aclStatus;
 }

◆ ClSplitterWorkloadValidate()

arm_compute::Status ClSplitterWorkloadValidate	(	const TensorInfo &	input,
		const std::vector< std::reference_wrapper< TensorInfo >> &	outputs,
		unsigned int	splitAxis
	)

Definition at line 31 of file ClSplitterWorkload.cpp.

Referenced by ClLayerSupport::IsSplitterSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
 
     size_t numOutputs = outputs.size();
 
     std::vector<arm_compute::TensorInfo> aclOutputs;
     aclOutputs.reserve(numOutputs);
 
     std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
     aclOutputPtr.reserve(numOutputs);
 
     for (size_t i = 0u; i < outputs.size(); ++i)
     {
         aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
         aclOutputPtr.emplace_back(&aclOutputs.back());
     }
 
     unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
     return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
 }

◆ ClSqrtWorkloadValidate()

arm_compute::Status ClSqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file ClSqrtWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     ActivationDescriptor descriptor;
     descriptor.m_Function = ActivationFunction::Sqrt;
     const arm_compute::ActivationLayerInfo activationLayerInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
 
     return arm_compute::CLActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
 }

◆ ClStackWorkloadValidate()

arm_compute::Status ClStackWorkloadValidate	(	const std::vector< const TensorInfo *> &	inputs,
		const TensorInfo &	output,
		const StackDescriptor &	descriptor
	)

Definition at line 29 of file ClStackWorkload.cpp.

Referenced by ClLayerSupport::IsStackSupported().

 {
     std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
     arm_compute::TensorInfo aclInputInfo;
     for (const TensorInfo* input : inputs)
     {
         aclInputInfo = BuildArmComputeTensorInfo(*input);
         aclInputPtrs.emplace_back(&aclInputInfo);
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
 
     return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
 }

◆ ClStridedSliceWorkloadValidate()

arm_compute::Status ClStridedSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const StridedSliceDescriptor &	descriptor
	)

Definition at line 27 of file ClStridedSliceWorkload.cpp.

Referenced by ClLayerSupport::IsStridedSliceSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
 
     std::tie(starts, ends, strides) = SetClStridedSliceData(descriptor.m_Begin, descriptor.m_End, descriptor.m_Stride);
 
     auto numDimensions       = armnn::numeric_cast<int>(input.GetNumDimensions());
     int32_t begin_mask       = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
     int32_t end_mask         = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
     int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
 
     return arm_compute::CLStridedSlice::validate(&aclInputInfo,
                                         &aclOutputInfo,
                                         starts,
                                         ends,
                                         strides,
                                         begin_mask,
                                         end_mask,
                                         shrink_axis_mask);
 }

◆ ClSubtractionValidate()

arm_compute::Status ClSubtractionValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 46 of file ClSubtractionWorkload.cpp.

Referenced by ClLayerSupport::IsSubtractionSupported(), and ClBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
                                                                                          &aclInput1Info,
                                                                                          &aclOutputInfo,
                                                                                          g_AclConvertPolicy,
                                                                                          activationInfo);
 
     return aclStatus;
 }

◆ ClTensorHandleFactoryId()

constexpr const char* armnn::ClTensorHandleFactoryId ( )

Definition at line 15 of file ClTensorHandleFactory.hpp.

Referenced by ClTensorHandleFactory::GetIdStatic().

 {
     return "Arm/Cl/TensorHandleFactory";
 }

◆ ClTransposeConvolution2dWorkloadValidate()

arm_compute::Status ClTransposeConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 26 of file ClTransposeConvolution2dWorkload.cpp.

Referenced by ClLayerSupport::IsTransposeConvolution2dSupported().

 {
     arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
 
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
 
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(descriptor);
 
     return arm_compute::CLDeconvolutionLayer::validate(&aclInputInfo,
                                                        &aclWeightsInfo,
                                                        optionalAclBiasesInfo,
                                                        &aclOutputInfo,
                                                        padStrideInfo);
 }

◆ ClTransposeWorkloadValidate()

arm_compute::Status ClTransposeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeDescriptor &	descriptor
	)

Definition at line 17 of file ClTransposeWorkload.cpp.

Referenced by ClLayerSupport::IsTransposeSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
 
     return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
                                             armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
 }

◆ ClUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	output,
		const Optional< TensorInfo > &	hiddenStateOutput,
		const Optional< TensorInfo > &	cellStateOutput,
		const UnidirectionalSequenceLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 508 of file ClUnidirectionalSequenceLstmFloatWorkload.cpp.

References TensorInfo::GetShape(), IgnoreUnused(), and LstmDescriptor::m_TimeMajor.

Referenced by ClLayerSupport::IsUnidirectionalSequenceLstmSupported().

 {
     IgnoreUnused(hiddenStateOutput, cellStateOutput);
 
     TensorShape inputLayerShape  = input.GetShape();
     TensorShape outputLayerShape = outputStateIn.GetShape();
 
     unsigned int maxTime    = descriptor.m_TimeMajor?inputLayerShape[0]:inputLayerShape[1];
     unsigned int batchSize  = descriptor.m_TimeMajor?inputLayerShape[1]:inputLayerShape[0];
     unsigned int inputSize  = inputLayerShape[2];
     unsigned int outputSize = outputLayerShape[2];
 
     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
 
     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute1 status");
     arm_compute::Status statusSplit    = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Split status");
     arm_compute::Status statusLSTM     = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "LSTM status");
     arm_compute::Status statusConcat   = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Concat status");
     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute2 status");
 
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     //
     // Permute validate
     //
     TensorInfo              permuteOutInfo    = TensorInfo(input);
     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
     if (!descriptor.m_TimeMajor)
     {
         statusPermute1 = arm_compute::CLPermute::validate(&aclInputInfo,
                                                           &aclPermuteOutInfo,
                                                           arm_compute::PermutationVector(0U, 2U, 1U));
     }
 
     //
     // Split and Concat Tensors validate
     //
     std::vector<arm_compute::TensorInfo>         splitterOutputsTensorInfos;
     std::vector<arm_compute::TensorInfo>         concatInputsTensorInfos;
     std::vector<arm_compute::ITensorInfo*>       splitterOutputsTensorInfosPtr;
     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
     splitterOutputsTensorInfos.reserve(maxTime);
     concatInputsTensorInfos.reserve(maxTime);
     for (unsigned int i = 0; i < maxTime; ++i)
     {
         arm_compute::TensorInfo splitter_out;
         arm_compute::TensorInfo concat_in;
 
         auto splitterTensorInfo = TensorInfo(input);
         auto concatTensorInfo   = TensorInfo(output);
         splitterTensorInfo.SetShape({batchSize, inputSize});
         concatTensorInfo.SetShape({batchSize, outputSize});
 
         arm_compute::TensorInfo aclSplitterTensorInfo
                                     = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
         arm_compute::TensorInfo aclConcatTensorInfo
                                     = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
 
         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
     }
 
     //
     // Split validate
     //
     unsigned int numberDimensions = 3;
     unsigned int dimension        = 0; // splitting on 0-dimension (i.e. maxTime dimension)
     unsigned int aclAxisSplit     = CalcAclAxis(numberDimensions, dimension);
 
     if (maxTime != 1) // ACL split does not work with only one element to split.
     {
         if (!descriptor.m_TimeMajor)
         {
             statusSplit = arm_compute::CLSplit::validate(&aclPermuteOutInfo,
                                                          splitterOutputsTensorInfosPtr,
                                                          aclAxisSplit);
         }
         else
         {
             statusSplit = arm_compute::CLSplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
         }
     }
 
     //
     // LSTM validate
     //
 
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
 
     const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
     const TensorInfo& outputStateOut = TensorInfo(outputStateIn.GetShape(), input.GetDataType());
     const TensorInfo& cellStateOut = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
 
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
 
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
 
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
 
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
                                          &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
     }
 
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
 
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
                                                         &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
 
     // Need to be set at negative threshold to be compatible for ACL
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
 
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
 
     for (unsigned int i = 0; i != maxTime; ++i)
     {
 
         // Set LSTM input and output ITensors depending on:
         // input format (timeMajor) & number of LSTM batches (maxTime).
         arm_compute::ITensorInfo* outputLSTM;
         arm_compute::ITensorInfo* inputLSTM;
         // If there is only one LSTM time major batch, we will not concat OR permute.
         // Set input of LSTM to be first input ITensor.
         // Set output of LSTM to be final output ITensor.
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
         }
             // If there is only one LSTM batch major batch, we will not concat, only permute.
             // Set input of LSTM to be output of initial permute.
             // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
             // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
         else if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = &aclPermuteOutInfo;
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
             // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
         else
         {
             inputLSTM = splitterOutputsTensorInfosPtr[i];
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
 
         statusLSTM = arm_compute::CLLSTMLayer::validate(inputLSTM,
                                                         &aclInputToForgetWeightsInfo,
                                                         &aclInputToCellWeightsInfo,
                                                         &aclInputToOutputWeightsInfo,
                                                         &aclRecurrentToForgetWeightsInfo,
                                                         &aclRecurrentToCellWeightsInfo,
                                                         &aclRecurrentToOutputWeightsInfo,
                                                         &aclForgetGateBiasInfo,
                                                         &aclCellBiasInfo,
                                                         &aclOutputGateBiasInfo,
                                                         &aclOutputStateInInfo,
                                                         &aclCellStateInInfo,
                                                         &aclScratchBufferInfo,
                                                         &aclOutputStateOutInfo,
                                                         &aclCellStateOutInfo,
                                                         outputLSTM,
                                                         lstm_params_info,
                                                         activationLayerInfo,
                                                         cell_threshold,
                                                         projection_threshold);
 
         if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
         {
             break;
         }
     }
 
     //
     // Concat validate
     //
 
     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
 
     TensorInfo concatOuputTensorInfo = TensorInfo(output);
     concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
     arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
 
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
         for (unsigned int i = 0; i < maxTime; ++i)
         {
             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
         }
 
         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclConcatOuputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
         {
             statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclOutputInfo,
                                                                      aclAxisConcat);
         }
     }
     // If only one LSTM batch, we do not concat and/or permute.
     // Must ensure final output info is expanded to correct batch major dimensions.
     else
     {
         if (!descriptor.m_TimeMajor)
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
     //
     // Permute validate
     //
     if (!descriptor.m_TimeMajor)
     {
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
             statusPermute2 = arm_compute::CLPermute::validate(&aclConcatOuputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
         else
         {
             statusPermute2 = arm_compute::CLPermute::validate(concatInputsTensorInfosPtr[0],
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
     }
 
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusPermute1.error_code() == okCode &&
         statusSplit.error_code()    == okCode &&
         statusLSTM .error_code()    == okCode &&
         statusConcat.error_code()   == okCode &&
         statusPermute2.error_code() == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All Unidirectional Sequence LSTM layer validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
 }

◆ Combine() [1/2]

MemorySourceFlags armnn::Combine	(	Arg	sourceA,
		Arg	sourceB
	)

Definition at line 30 of file MemorySources.hpp.

Referenced by Combine().

 {
     return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
 }

◆ Combine() [2/2]

MemorySourceFlags armnn::Combine	(	Arg	source,
		Args...	rest
	)

Definition at line 36 of file MemorySources.hpp.

References Combine().

 {
     return static_cast<MemorySourceFlags>(source) | Combine(rest...);
 }

◆ ComputeAclAxis()

int armnn::ComputeAclAxis	(	const int &	armnnAxis,
		const armnn::TensorInfo &	tensor
	)

inline

Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)

Definition at line 264 of file ArmComputeUtils.hpp.

References ARMNN_ASSERT, and TensorInfo::GetNumDimensions().

Referenced by ClGatherWorkload::ClGatherWorkload(), ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(), ClSoftmaxWorkload::ClSoftmaxWorkload(), NeonGatherWorkload::NeonGatherWorkload(), NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(), and NeonSoftmaxWorkload::NeonSoftmaxWorkload().

 {
     int rank = static_cast<int>(tensor.GetNumDimensions());
 
     ARMNN_ASSERT(rank != 0);
     ARMNN_ASSERT((-1 * rank) <= armnnAxis);
     ARMNN_ASSERT(armnnAxis < rank);
 
     int sign = (armnnAxis < 0) ? -1 : 1;
     int aclAxis = sign * rank - 1  - armnnAxis;
 
     return aclAxis;
 }

◆ ComputeConv3DInfo() [1/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo	(	const armnn::Convolution3dDescriptor	descriptor,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

inline

Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.

Definition at line 293 of file ArmComputeUtils.hpp.

 {
     const arm_compute::Size3D    stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
     const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
                                          descriptor.m_PadTop, descriptor.m_PadBottom,
                                          descriptor.m_PadFront, descriptor.m_PadBack};
     const arm_compute::Size3D    dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
 
     const arm_compute::ActivationLayerInfo activationInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor);
     const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
 
     return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
 }

◆ ComputeConv3DInfo() [2/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo	(	const armnn::Convolution3dQueueDescriptor	queueDescriptor,
		bool	isFastMathEnabled
	)

inline

Definition at line 310 of file ArmComputeUtils.hpp.

References ConvertAdditionalInfoToAclActivationLayerInfo(), QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Convolution3dDescriptor::m_StrideX.

 {
     auto descriptor = queueDescriptor.m_Parameters;
     const arm_compute::Size3D    stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
     const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
                                          descriptor.m_PadTop, descriptor.m_PadBottom,
                                          descriptor.m_PadFront, descriptor.m_PadBack};
     const arm_compute::Size3D    dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
 
     const arm_compute::ActivationLayerInfo activationInfo =
             ConvertAdditionalInfoToAclActivationLayerInfo(queueDescriptor);
     const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
 
     return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
 }

◆ ComputePositiveAxis()

unsigned int armnn::ComputePositiveAxis	(	const int &	axis,
		const armnn::TensorInfo &	tensor
	)

inline

Function to convert axis to its positive equivalent value.

[-rank, rank) –> [0, rank)

Definition at line 280 of file ArmComputeUtils.hpp.

References ARMNN_ASSERT, and TensorInfo::GetNumDimensions().

 {
     int rank = static_cast<int>(tensor.GetNumDimensions());
 
     ARMNN_ASSERT(rank != 0);
     ARMNN_ASSERT((-1 * rank) <= axis);
     ARMNN_ASSERT(axis < rank);
 
     int positiveAxis = (axis < 0) ? rank + axis : axis;
     return static_cast<unsigned int>(positiveAxis);
 }

◆ ComputeReductionTensorShape()

const TensorInfo armnn::ComputeReductionTensorShape	(	const armnn::TensorInfo &	input,
		const std::vector< uint32_t > &	vAxis,
		const bool	keepDims
	)

inline

Function to compute the output tensor shape based on the axes and if keepDims is set.

Definition at line 352 of file ArmComputeUtils.hpp.

References TensorInfo::GetNumDimensions(), and numeric_cast().

Referenced by ChainReduceLayers().

 {
     auto reducedTensorInfo = input;
     unsigned int rank = reducedTensorInfo.GetNumDimensions();
     unsigned int outputRank = 0;
     // Calculate output dimension
     if (keepDims)
     {
         outputRank = rank;
     }
     else if (vAxis.empty())
     {
         outputRank = 1;
     }
     else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
     {
         throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
     }
     else
     {
         outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
         if (outputRank == 0)
         {
             outputRank = 1;
         }
     }
     std::vector<unsigned int> dimSizes(outputRank, 1);
     if (!vAxis.empty())
     {
         // Skip the dimension that has been reduced unless keepDims is true.
         unsigned int outputIndex = 0;
         for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
         {
             if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
             {
                 dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
                 ++outputIndex;
             }
             else if (keepDims)
             {
                 dimSizes[outputIndex] = 1;
                 ++outputIndex;
             }
         }
     }
     const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
     reducedTensorInfo.SetShape(inferredShape);
     return reducedTensorInfo;
 }

◆ ComputeSoftmaxAclAxis()

T armnn::ComputeSoftmaxAclAxis	(	const SoftmaxDescriptor &	softmaxDesc,
		const armnn::TensorInfo &	tensor
	)

inline

Definition at line 225 of file ArmComputeUtils.hpp.

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), and SoftmaxDescriptor::m_Axis.

 {
     // Detect the Android default value of -1 and return the ACL default value of 0.
     if (softmaxDesc.m_Axis == -1)
     {
         return 0;
     }
 
     unsigned int dim = tensor.GetNumDimensions();
 
     ARMNN_ASSERT(dim != 0);
 
     // Currently ArmNN support axis 1.
     auto aclAxis = (static_cast<T>(dim) - 1);
     aclAxis = aclAxis > 0 ? aclAxis -1 : aclAxis;
 
     return aclAxis;
 }

◆ ComputeSplitAxis()

std::set<unsigned int> armnn::ComputeSplitAxis	(	const armnn::SplitterDescriptor &	desc,
		const TensorShape &	input
	)

inline

Definition at line 244 of file ArmComputeUtils.hpp.

References ViewsDescriptor::GetNumDimensions(), ViewsDescriptor::GetNumViews(), and ViewsDescriptor::GetViewSizes().

Referenced by ClSplitterWorkload::ClSplitterWorkload(), SplitterLayer::CreateWorkload(), ClLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsSplitterSupported(), and NeonSplitterWorkload::NeonSplitterWorkload().

 {
     unsigned int numSplit = desc.GetNumViews();
     unsigned int numDimensions = desc.GetNumDimensions();
     std::set<unsigned int> splitAxis;
 
     for (unsigned int i = 0; i < numSplit; ++i)
     {
         for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
         {
             if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
             {
                 splitAxis.insert(dimIdx);
             }
         }
     }
     return splitAxis;
 }

◆ Concatenate()

void Concatenate	(	const ConcatQueueDescriptor &	data,
		std::vector< ITensorHandle *>	inputs,
		std::vector< ITensorHandle *>	outputs
	)

Definition at line 14 of file Concatenate.cpp.

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), GetTensorInfo(), ConcatQueueDescriptor::ViewOrigin::m_Origin, ConcatQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

Referenced by RefConcatWorkload::ExecuteAsync().

 {
     const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]);
 
     std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map());
     Encoder<float>& encoder = *encoderPtr;
 
     for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
     {
         unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
 
         unsigned int indexRemainder = index;
         unsigned int dimensionStride = outputInfo0.GetNumElements();
 
         for (unsigned int i = 0; i < outputInfo0.GetNumDimensions(); i++)
         {
             dimensionStride /= outputInfo0.GetShape()[i];
             indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
             indexRemainder -= indices[i] * dimensionStride;
         }
 
         for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
         {
             ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
 
             //Split view extents are defined by the size of (the corresponding) input tensor.
             const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]);
             ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
 
             // Check all dimensions to see if this element is inside the given input view.
             bool insideView = true;
             for (unsigned int i = 0; i < inputInfo.GetNumDimensions(); i++)
             {
                 if (indices[i] < view.m_Origin[i])
                 {
                     insideView = false;
                 }
                 if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
                 {
                     insideView = false;
                 }
             }
 
             if (insideView)
             {
                 std::unique_ptr<Decoder<float>> decoderPtr =
                     MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map());
                 Decoder<float>& decoder = *decoderPtr;
                 unsigned int inIndex = 0;
                 unsigned int dimensionStride = 1;
 
                 for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
                 {
                     inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
                     dimensionStride *= inputInfo.GetShape()[i];
                 }
                 decoder += inIndex;
                 encoder.Set(decoder.Get());
 
                 //What should we do if input views overlap on the output tensor?
                 //We could error, take the average, or shm else...
                 //For now just stop after finding first view (input) that matches.
                 break;
             }
         }
         ++encoder;
     }
 }

◆ ConditionalThrow() [1/2]

void armnn::ConditionalThrow	(	bool	condition,
		const std::string &	message
	)

Definition at line 165 of file Exceptions.hpp.

 {
     if (!condition)
     {
         throw ExceptionType(message);
     }
 }

◆ ConditionalThrow() [2/2]

void armnn::ConditionalThrow ( bool condition )

Definition at line 174 of file Exceptions.hpp.

 {
     if (!condition)
     {
         throw ExceptionType();
     }
 }

◆ ConditionalThrowIfNotEqual()

void armnn::ConditionalThrowIfNotEqual	(	const std::string &	message,
		const ComparedType &	leftHandSide,
		const ComparedType &	rightHandSide
	)

ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&)

Definition at line 189 of file Exceptions.hpp.

 {
     if (!(leftHandSide == rightHandSide))
     {
         std::stringstream ss;
         ss << message << " : " << leftHandSide << " != " << rightHandSide;
         throw ExceptionType(ss.str());
     }
 }

◆ ConfigureDetailsObject()

void armnn::ConfigureDetailsObject	(	JsonChildObject &	detailsObject,
		std::string	layerDetailsStr
	)

Definition at line 295 of file Profiling.cpp.

References ExecObjectDesc, JsonChildObject::SetAndParseDetails(), and JsonChildObject::SetType().

 {
     detailsObject.SetType(JsonObjectType::ExecObjectDesc);
     detailsObject.SetAndParseDetails(layerDetailsStr);
 
 }

◆ ConfigureLogging()

void ConfigureLogging	(	bool	printToStandardOutput,
		bool	printToDebugOutput,
		LogSeverity	severity
	)

Configures the logging behaviour of the ARMNN library.

printToStandardOutput: Set to true if log messages should be printed to the standard output. printToDebugOutput: Set to true if log messages be printed to a platform-specific debug output (where supported). severity: All log messages that are at this severity level or higher will be printed, others will be ignored.

Examples:: AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, and SimpleSample.cpp.

Definition at line 18 of file Utils.cpp.

References SetAllLoggingSinks(), SetLogFilter(), and Trace.

Referenced by ConfigureLoggingTest(), ProfilingServiceRuntimeHelper::ForceTransitionToState(), armnn::test::InferenceTestMain(), and main().

 {
     SetAllLoggingSinks(printToStandardOutput, printToDebugOutput, false);
     SetLogFilter(severity);
 }

◆ ConfigureTuner()

void armnn::ConfigureTuner	(	arm_compute::CLTuner &	tuner,
		TuningLevel	level
	)

Definition at line 115 of file ClBackendContext.cpp.

References ARMNN_LOG, Exhaustive, info, None, Normal, and Rapid.

Referenced by ClBackendContext::ClBackendContext().

 {
     tuner.set_tune_new_kernels(true); // Turn on tuning initially.
 
     switch (level)
     {
         case TuningLevel::Rapid:
             ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
             tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
             break;
         case TuningLevel::Normal:
             ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
             tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
             break;
         case TuningLevel::Exhaustive:
             ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
             tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
             break;
         case TuningLevel::None:
         default:
             tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
             break;
     }
 }

◆ Convert1HWOTensorInfoToAcl()

std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl	(	const TensorInfo &	weightInfo,
		const TensorInfo &	inputInfo,
		const DataLayout	dataLayout
	)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier.

Definition at line 170 of file WorkloadUtils.cpp.

References GetDataLayoutName(), TensorInfo::GetShape(), NCHW, NHWC, and armnnUtils::Permuted().

Referenced by GatherTensorHandlePairs().

 {
     unsigned int aclDepthMultiplier = 1;
     TensorInfo weightsPermuted;
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         // No permutation required. Input and weights data layouts are the same.
         aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
         weightsPermuted = weightInfo;
     }
 
     else if (dataLayout == armnn::DataLayout::NCHW)
     {
         // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different.
         // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
         aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
         PermutationVector permutationVector{ 0, 2, 3, 1 };
         weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
     }
     else
     {
         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
                                                    GetDataLayoutName(dataLayout)));
     }
 
     return std::make_tuple(weightsPermuted, aclDepthMultiplier);
 }

◆ Convert1HWOTensorToAcl()

std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl	(	const ConstTensorHandle *	weightTensor,
		const TensorInfo &	inputInfo,
		const DataLayout	dataLayout,
		void *	permuteBuffer
	)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library.

Parameters

weightTensor	- ConstTensorHandle of weights tensor
inputInfo	- TensorInfo of input tensor
dataLayout	- DataLayout of the input tensor
permuteBuffer	- Pointer to memory with the size of tensor. Used for the permutation

Returns: tuple of transformed weights-ConstTensor and depthwise multiplier

Definition at line 139 of file WorkloadUtils.cpp.

References GetDataLayoutName(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, and PermuteTensor().

Referenced by GatherTensorHandlePairs().

 {
     TensorInfo weightsInfo = weightTensor->GetTensorInfo();
     unsigned int depthMultiplier = 1;
     PermutationVector permutationVector{};
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         // No permutation required. Data layouts are the same.
 
         depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
     }
     else if (dataLayout == armnn::DataLayout::NCHW)
     {
         // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
         depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
         permutationVector = { 0, 2, 3, 1 };
     }
     else
     {
         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
                                                    GetDataLayoutName(dataLayout)));
     }
 
     ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
 
     return std::make_tuple(weightsPermuted, depthMultiplier);
 }

◆ Convert1HWOtoMIHW()

std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW	(	const ConstTensorHandle *	weightTensor,
		const TensorInfo &	inputInfo,
		const DataLayout &	dataLayout,
		void *	permuteBuffer
	)

Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].

Parameters

weightTensor	- ConstTensorHandle of the weight tensor that should be converted
inputInfo	- TensorInfo of the corresponding input tensor
dataLayout	- DataLayout of the input tensor e.g. NHWC or NCHW
permuteBuffer	- Memory location with the same size as the weight tensor to write converted data to

Returns: - A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier

Definition at line 201 of file WorkloadUtils.cpp.

References DataLayoutIndexed::GetChannelsIndex(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), TensorInfo::HasPerAxisQuantization(), PermuteTensor(), and TensorInfo::SetShape().

Referenced by GatherTensorHandlePairs().

 {
     TensorInfo weightsInfo = weightTensor->GetTensorInfo();
 
     if (weightsInfo.HasPerAxisQuantization())
     {
         throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
                                        "quantization is applied.");
     }
 
     // Reshape weights  [ 1, H, W, I*M ] --> [ H, W, I, M ]
     auto weightsShape = weightsInfo.GetShape();
     auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
     unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
     weightsInfo.SetShape({ weightsShape[1],
                            weightsShape[2],
                            inputInfo.GetShape()[channelIndex],
                            depthMultiplier});
 
     // Permute [ H, W, I, M ] --> [ M, I, H, W ]
     PermutationVector permutationVector = { 2, 3, 1, 0 };
     ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
 
     return std::make_tuple(weightsPermuted, depthMultiplier);
 }

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor & actDesc )

inline

Definition at line 85 of file ArmComputeUtils.hpp.

References ConvertActivationFunctionToAclActivationFunction(), ActivationDescriptor::m_A, ActivationDescriptor::m_B, and ActivationDescriptor::m_Function.

Referenced by ClActivationWorkload::ClActivationWorkload(), ClSqrtWorkload::ClSqrtWorkload(), ComputeConv3DInfo(), ConvertActivationDescriptorToAclActivationLayerInfo(), ConvertAdditionalInfoToAclActivationLayerInfo(), ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(), NeonActivationWorkload::NeonActivationWorkload(), and NeonSqrtWorkload::NeonSqrtWorkload().

 {
     return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function),
         actDesc.m_A, actDesc.m_B);
 }

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor * activationDescPtr )

inline

Definition at line 92 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo().

 {
     if (activationDescPtr != nullptr)
     {
         return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
                                                                            *activationDescPtr));
     }
     return arm_compute::ActivationLayerInfo();
 }

◆ ConvertActivationFunctionToAclActivationFunction()

arm_compute::ActivationLayerInfo::ActivationFunction armnn::ConvertActivationFunctionToAclActivationFunction ( ActivationFunction armnnFunction )

inline

Definition at line 61 of file ArmComputeUtils.hpp.

References Abs, BoundedReLu, Elu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by ConvertActivationDescriptorToAclActivationLayerInfo().

 {
     using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
 
     switch (armnnFunction)
     {
         case ActivationFunction::Linear:        return AclActivationFunction::LINEAR;
         // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
         case ActivationFunction::Sigmoid:       return AclActivationFunction::LOGISTIC;
         case ActivationFunction::ReLu:          return AclActivationFunction::RELU;
         case ActivationFunction::BoundedReLu:   return AclActivationFunction::LU_BOUNDED_RELU;
         case ActivationFunction::SoftReLu:      return AclActivationFunction::SOFT_RELU;
         case ActivationFunction::LeakyReLu:     return AclActivationFunction::LEAKY_RELU;
         case ActivationFunction::Abs:           return AclActivationFunction::ABS;
         case ActivationFunction::Sqrt:          return AclActivationFunction::SQRT;
         case ActivationFunction::Square:        return AclActivationFunction::SQUARE;
         case ActivationFunction::TanH:          return AclActivationFunction::TANH;
         case ActivationFunction::Elu:           return AclActivationFunction::ELU;
         case ActivationFunction::HardSwish:     return AclActivationFunction::HARD_SWISH;
         default:                                throw InvalidArgumentException("Unsupported activation function");
     }
 }

◆ ConvertAdditionalInfoToAclActivationLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertAdditionalInfoToAclActivationLayerInfo ( const QueueDescriptor & queueDescriptor )

inline

Definition at line 103 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo(), and QueueDescriptor::GetAdditionalInformation().

Referenced by ClAdditionWorkload::ClAdditionWorkload(), ClDivisionWorkload::ClDivisionWorkload(), ClFullyConnectedWorkload::ClFullyConnectedWorkload(), ClMultiplicationWorkload::ClMultiplicationWorkload(), ClSubtractionWorkload::ClSubtractionWorkload(), ComputeConv3DInfo(), NeonAdditionWorkload::NeonAdditionWorkload(), NeonDivisionWorkload::NeonDivisionWorkload(), NeonMultiplicationWorkload::NeonMultiplicationWorkload(), and NeonSubtractionWorkload::NeonSubtractionWorkload().

 {
     const ActivationDescriptor* activationDescPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
 
     if (activationDescPtr != nullptr)
     {
         return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
                 *activationDescPtr));
     }
     return arm_compute::ActivationLayerInfo();
 }

◆ ConvertBf16ToFp32Weight()

LayerT* armnn::ConvertBf16ToFp32Weight ( Layer * l )

Definition at line 631 of file Network.cpp.

References BFloat16, FloatingPointConverter::ConvertBFloat16ToFloat32(), Convolution2d, Float32, FullyConnected, TensorInfo::GetDataType(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), and info.

 {
     LayerT* layer = PolymorphicDowncast<LayerT*>(l);
     if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
          && layer->m_Weight)
     {
         const TensorInfo& info = layer->m_Weight->GetTensorInfo();
 
         if (info.GetDataType() == DataType::BFloat16)
         {
             std::vector<float> newValues(info.GetNumElements());
 
             armnnUtils::FloatingPointConverter::ConvertBFloat16ToFloat32(
                 layer->m_Weight->template GetConstTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
 
             TensorInfo newInfo(info.GetShape(), DataType::Float32);
             ConstTensor newInput(newInfo, newValues);
             layer->m_Weight.reset(new ScopedTensorHandle(newInput));
         }
     }
     return layer;
 }

◆ ConvertComparisonOperationToAcl()

arm_compute::ComparisonOperation armnn::ConvertComparisonOperationToAcl ( const ComparisonDescriptor & descriptor )

inline

Definition at line 139 of file ArmComputeUtils.hpp.

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, ComparisonDescriptor::m_Operation, and NotEqual.

Referenced by ClComparisonWorkload::ClComparisonWorkload(), and NeonComparisonWorkload::NeonComparisonWorkload().

 {
     switch (descriptor.m_Operation)
     {
         case ComparisonOperation::Greater:         return arm_compute::ComparisonOperation::Greater;
         case ComparisonOperation::GreaterOrEqual:  return arm_compute::ComparisonOperation::GreaterEqual;
         case ComparisonOperation::Less:            return arm_compute::ComparisonOperation::Less;
         case ComparisonOperation::LessOrEqual:     return arm_compute::ComparisonOperation::LessEqual;
         case ComparisonOperation::Equal:           return arm_compute::ComparisonOperation::Equal;
         case ComparisonOperation::NotEqual:        return arm_compute::ComparisonOperation::NotEqual;
         default:                                   throw InvalidArgumentException("Unsupported comparison function");
     }
 }

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [1/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo	(	const FullyConnectedDescriptor &	fullyConnectedDesc,
		const ActivationDescriptor *	activationDesc
	)

inline

Definition at line 192 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo(), and FullyConnectedDescriptor::m_TransposeWeightMatrix.

Referenced by ClFullyConnectedWorkload::ClFullyConnectedWorkload().

 {
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
     fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
     return fc_info;
 }

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [2/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo	(	const FullyConnectedDescriptor &	fullyConnectedDesc,
		arm_compute::ActivationLayerInfo	activationLayerInfo
	)

inline

Definition at line 202 of file ArmComputeUtils.hpp.

References FullyConnectedDescriptor::m_TransposeWeightMatrix.

 {
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
     fc_info.activation_info = activationLayerInfo;
     return fc_info;
 }

◆ ConvertLogSeverity()

constexpr LogSeverity armnn::ConvertLogSeverity ( BoostLogSeverityMapping severity )

Definition at line 199 of file Logging.hpp.

 {
     return static_cast<LogSeverity>(severity);
 }

◆ ConvertLstmActivationFuncToAclLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertLstmActivationFuncToAclLayerInfo ( uint32_t activationFunction )

inline

Definition at line 116 of file ArmComputeUtils.hpp.

 {
     // For preparing the object for the class ActivationLayerInfo, we need to consider 5 situations.
     switch (activationFunction)
     {
         case 0:
             return arm_compute::ActivationLayerInfo(); // no activation, do nothing
         case 1:
             return arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
         case 3:
             return arm_compute::ActivationLayerInfo(
                 arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
         case 4:
             return arm_compute::ActivationLayerInfo(
                 arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
         case 6:
             return arm_compute::ActivationLayerInfo(
                 arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
         default:
             throw armnn::Exception("Wrong Type of Activation Function!");
     }
 }

◆ ConvertMaskToACLFormat()

int32_t ConvertMaskToACLFormat	(	int32_t	mask,
		int32_t	numDim
	)

Definition at line 286 of file WorkloadUtils.cpp.

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload(), GatherTensorHandlePairs(), and NeonStridedSliceWorkload::NeonStridedSliceWorkload().

 {
     int32_t reversedMask = 0;
     for (unsigned int i = 0; i < armnn::numeric_cast<unsigned int>(numDim); ++i)
     {
         // Check if bit set in mask for each dimension
         int32_t bit = (mask & 1 << i) != 0;
         // Increment the new mask with the bits reversed
         reversedMask += (bit << std::max(numDim-(armnn::numeric_cast<int>(i)+1), 0));
     }
 
     return reversedMask;
 }

◆ ConvertNormalizationAlgorithmChannelToAclNormType()

arm_compute::NormType armnn::ConvertNormalizationAlgorithmChannelToAclNormType ( NormalizationAlgorithmChannel channelType )

inline

Definition at line 180 of file ArmComputeUtils.hpp.

References Across, and Within.

 {
     using arm_compute::NormType;
     switch (channelType)
     {
         case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
         case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
         default:    throw InvalidArgumentException("Unsupported normalization algorithm channel type");
     }
 }

◆ ConvertOutputShapeRoundingToAclDimensionRoundingType()

arm_compute::DimensionRoundingType armnn::ConvertOutputShapeRoundingToAclDimensionRoundingType ( OutputShapeRounding rounding )

inline

Definition at line 166 of file ArmComputeUtils.hpp.

References Ceiling, and Floor.

 {
     using arm_compute::DimensionRoundingType;
 
     switch (rounding)
     {
         case OutputShapeRounding::Ceiling:  return DimensionRoundingType::CEIL;
         case OutputShapeRounding::Floor:    return DimensionRoundingType::FLOOR;
         default:                            throw InvalidArgumentException("Unsupported Output Shape Rounding type");
     }
 }

◆ ConvertPaddingModeToAcl()

arm_compute::PaddingMode armnn::ConvertPaddingModeToAcl ( const PaddingMode & paddingMode )

inline

Definition at line 327 of file ArmComputeUtils.hpp.

References Constant, Reflect, and Symmetric.

 {
     switch (paddingMode)
     {
         case PaddingMode::Constant:   return arm_compute::PaddingMode::CONSTANT;
         case PaddingMode::Reflect:    return arm_compute::PaddingMode::REFLECT;
         case PaddingMode::Symmetric:  return arm_compute::PaddingMode::SYMMETRIC;
         default:                      throw InvalidArgumentException("Unsupported Padding Mode");
     }
 }

◆ ConvertPoolingAlgorithmToAclPoolingType()

arm_compute::PoolingType armnn::ConvertPoolingAlgorithmToAclPoolingType ( PoolingAlgorithm poolingAlgorithm )

inline

Definition at line 153 of file ArmComputeUtils.hpp.

References Average, L2, and Max.

 {
     using arm_compute::PoolingType;
 
     switch (poolingAlgorithm)
     {
         case PoolingAlgorithm::Max:             return PoolingType::MAX;
         case PoolingAlgorithm::Average:         return PoolingType::AVG;
         case PoolingAlgorithm::L2:              return PoolingType::L2;
         default:                                throw InvalidArgumentException("Unsupported pooling algorithm");
     }
 }

◆ ConvertReductionOperationToAcl()

arm_compute::ReductionOperation armnn::ConvertReductionOperationToAcl ( const ReduceDescriptor & descriptor )

inline

Definition at line 338 of file ArmComputeUtils.hpp.

References ReduceDescriptor::m_ReduceOperation, Max, Mean, Min, Prod, and Sum.

 {
     switch (descriptor.m_ReduceOperation)
     {
         case ReduceOperation::Sum:    return arm_compute::ReductionOperation::SUM;
         case ReduceOperation::Mean:   return arm_compute::ReductionOperation::MEAN_SUM;
         case ReduceOperation::Max:    return arm_compute::ReductionOperation::MAX;
         case ReduceOperation::Min:    return arm_compute::ReductionOperation::MIN;
         case ReduceOperation::Prod:   return arm_compute::ReductionOperation::PROD;
         default:                      throw InvalidArgumentException("Unsupported Reduction operation");
     }
 }

◆ ConvertResizeMethodToAclInterpolationPolicy()

arm_compute::InterpolationPolicy armnn::ConvertResizeMethodToAclInterpolationPolicy ( ResizeMethod resizeMethod )

inline

Definition at line 211 of file ArmComputeUtils.hpp.

References Bilinear, and NearestNeighbor.

 {
     switch (resizeMethod)
     {
         case ResizeMethod::Bilinear:
             return arm_compute::InterpolationPolicy::BILINEAR;
         case ResizeMethod::NearestNeighbor:
             return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
         default:
             throw InvalidArgumentException("Unsupported resize method");
     }
 }

◆ ConvertWeightTensorFromArmnnToAcl()

armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl	(	const ConstTensorHandle *	weightTensor,
		DataLayout	dataLayout,
		void *	permuteBuffer
	)

Definition at line 230 of file WorkloadUtils.cpp.

References ARMNN_ASSERT_MSG, Float16, Float32, BaseTensor< MemoryType >::GetDataType(), BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, PermuteTensor(), QAsymmS8, QAsymmU8, QSymmS8, and ReshapeWeightsForAcl().

Referenced by GatherTensorHandlePairs().

 {
     ARMNN_ASSERT_MSG(weightTensor, "Invalid input tensor");
     ARMNN_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
 
     auto multiplier    = weightTensor->GetTensorInfo().GetShape()[0];
     auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1];
 
     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
 
     // 1. Permute the weights if necessary
     // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
     // starting from the current shape of [ M, I, H, W ]
     // If no permutation is necessary, leave the permutation vector empty
     PermutationVector permutationVector{};
     if (dataLayout == DataLayout::NHWC)
     {
         // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
         permutationVector = { 3, 2, 0, 1 };
     }
     ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
 
     // Shuffle the weights data to obtain the channel order needed used by Acl
     if (multiplier > 1 && inputChannels > 1 && dataLayout == DataLayout::NCHW)
     {
         switch (weightPermuted.GetDataType())
         {
             case DataType::Float32:
                 weightPermuted = ReorderWeightChannelsForAcl<float>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             case DataType::Float16:
                 weightPermuted =
                     ReorderWeightChannelsForAcl<half_float::half>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             case DataType::QAsymmS8:
             case DataType::QAsymmU8:
                 weightPermuted = ReorderWeightChannelsForAcl<uint8_t>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             case DataType::QSymmS8:
                 weightPermuted = ReorderWeightChannelsForAcl<int8_t>(weightPermuted, dataLayout, permuteBuffer);
                 break;
             default:
                 break;
         }
     }
 
     // 2. Reshape the weights
     ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
 
     // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
     return weightPermuted;
 }

◆ ConvertWeightTensorInfoFromArmnnToAcl()

TensorInfo ConvertWeightTensorInfoFromArmnnToAcl	(	const TensorInfo &	weightInfo,
		DataLayout	dataLayout
	)

Definition at line 115 of file WorkloadUtils.cpp.

References NHWC, armnnUtils::Permuted(), and ReshapeWeightsForAcl().

Referenced by GatherTensorHandlePairs().

 {
     // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
     // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
 
     // 1. Permute the weights if necessary
     // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
     // starting from the current shape of [ M, I, H, W ]
     TensorInfo weightPermutedInfo(weightInfo);
     if (dataLayout == DataLayout::NHWC)
     {
         // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
         PermutationVector permutationVector{ 3, 2, 0, 1 };
         weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
     }
 
     // 2. Reshape the weights
     ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
 
     // 3. Return the permuted weight info
     return weightPermutedInfo;
 }

◆ Convolve()

void Convolve	(	const TensorShape &	rInputShape,
		Decoder< float > &	rInputDecoder,
		const TensorShape &	rOutputShape,
		Encoder< float > &	rOutputEncoder,
		const TensorShape &	rFilterShape,
		Decoder< float > &	rFilterDecoder,
		bool	biasEnabled,
		Decoder< float > *	pBiasDecoder,
		DataLayout	dataLayout,
		unsigned int	paddingTop,
		unsigned int	paddingLeft,
		unsigned int	xStride,
		unsigned int	yStride,
		unsigned int	xDilation,
		unsigned int	yDilation,
		bool	depthwise
	)

Definition at line 71 of file ConvImpl.cpp.

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NHWC, and Encoder< IType >::Set().

Referenced by RefDepthwiseConvolution2dWorkload::ExecuteAsync(), and RefConvolution2dWorkload::ExecuteAsync().

 {
     if (biasEnabled && !pBiasDecoder)
     {
         throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
     }
     const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
 
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
     const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
 
     // Weights layout:
     // Conv2d:    [O,H,W,I]
     // Depthwise: [1,H,W,O]
     const unsigned int inputChannels   = rInputShape[channelsIndex];
     const unsigned int outputChannels  = rOutputShape[channelsIndex];
     const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
 
     const unsigned int batchSize    = rOutputShape[0];
     const unsigned int outputHeight = rOutputShape[heightIndex];
     const unsigned int outputWidth  = rOutputShape[widthIndex];
     const unsigned int inputHeight  = rInputShape[heightIndex];
     const unsigned int inputWidth   = rInputShape[widthIndex];
 
     const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
     const unsigned int filterWidth  = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
 
     const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
     const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
 
     const TensorShape biasShape{outputChannels};
     const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
 
     for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
     {
         for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
         {
             for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
             {
                 for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
                 {
                     // This loop goes over each output element.
                     float sum = 0.0f;
 
                     // For depthwise, each output channel corresponds to exactly one input channel.
                     // For normal, must loop over each input channel.
                     for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
                     {
                         for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
                         {
                             for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
                             {
                                 // This loop goes over each input element for each output element.
                                 unsigned int filterIndex = 0;
 
                                 // Since dimensionality of kernel depends on depthwiseness, so does index.
                                 if (depthwise)
                                 {
                                     cInput = cOutput / depthMultiplier;
                                     // filterDepth = outputChannels;
                                     filterIndex = xFilter * outputChannels + cOutput +
                                                   yFilter * filterWidth * outputChannels;
                                 }
                                 else
                                 {
                                     // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
                                     // performance regression.
                                     if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
                                     {
                                         filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
                                                       yFilter * filterWidth * inputChannels +
                                                       xFilter * inputChannels +
                                                       cInput;
                                     }
                                     else
                                     {
                                         filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
                                                       cInput * filterWidth * filterHeight +
                                                       yFilter * filterWidth +
                                                       xFilter;
                                     }
                                 }
 
                                 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
                                 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
 
                                 float inputValue;
 
                                 // Check if we're in the padding.
                                 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
                                     xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
                                 {
                                     inputValue = 0.0f;
                                 }
                                 else
                                 {
                                     unsigned int inputIndex = 0;
 
                                     // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
                                     // performance regression.
                                     if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
                                     {
                                         inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
                                                      (yInput - paddingTop) * inputWidth * inputChannels +
                                                      (xInput - paddingLeft) * inputChannels +
                                                      cInput;
                                     }
                                     else
                                     {
                                         inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
                                                      inputWidth * inputHeight * cInput +
                                                      inputWidth * (yInput - paddingTop) +
                                                      xInput - paddingLeft;
                                     }
                                     inputValue = inputVec[inputIndex];
                                 }
 
                                 sum += filterVec[filterIndex] * inputValue;
                             }
                         }
                     }
 
                     if (biasEnabled)
                     {
                         sum += biasVec[cOutput];
                     }
 
                     unsigned int outIdx;
                     if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
                     {
                         outIdx =  batchIdx * outputHeight * outputWidth * outputChannels +
                                   yOutput * outputWidth * outputChannels +
                                   xOutput * outputChannels +
                                   cOutput;
                     }
                     else
                     {
                         outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
                                  cOutput * outputHeight * outputWidth +
                                  yOutput * outputWidth +
                                  xOutput;
                     }
 
                     rOutputEncoder[outIdx];
                     rOutputEncoder.Set(sum);
                 }
             }
         }
     }
 }

◆ Convolve3d()

void Convolve3d	(	const TensorShape &	rInputShape,
		Decoder< float > &	rInputDecoder,
		const TensorShape &	rOutputShape,
		Encoder< float > &	rOutputEncoder,
		const TensorShape &	rFilterShape,
		Decoder< float > &	rFilterDecoder,
		bool	biasEnabled,
		Decoder< float > *	pBiasDecoder,
		DataLayout	dataLayout,
		unsigned int	paddingTop,
		unsigned int	paddingLeft,
		unsigned int	paddingFront,
		unsigned int	xStride,
		unsigned int	yStride,
		unsigned int	zStride,
		unsigned int	xDilation,
		unsigned int	yDilation,
		unsigned int	zDilation
	)

Definition at line 11 of file Conv3dImpl.cpp.

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NDHWC, and Encoder< IType >::Set().

Referenced by RefConvolution3dWorkload::ExecuteAsync().

 {
     if (biasEnabled && !pBiasDecoder)
     {
         throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
     }
     const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
 
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
     const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
     const unsigned int depthIndex    = dataLayoutIndexed.GetDepthIndex();
 
     const unsigned int inChannels   = rInputShape[channelsIndex];
     const unsigned int outChannels  = rOutputShape[channelsIndex];
 
     const unsigned int batchSize    = rOutputShape[0];
     const unsigned int outputHeight = rOutputShape[heightIndex];
     const unsigned int outputWidth  = rOutputShape[widthIndex];
     const unsigned int outputDepth  = rOutputShape[depthIndex];
     const unsigned int inputHeight  = rInputShape[heightIndex];
     const unsigned int inputWidth   = rInputShape[widthIndex];
     const unsigned int inputDepth   = rInputShape[depthIndex];
 
     // Conv3d weights layout: [D,H,W,I,O]
     const unsigned int filterDepth  = rFilterShape[0];
     const unsigned int filterHeight = rFilterShape[1];
     const unsigned int filterWidth  = rFilterShape[2];
 
     const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
     const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);
 
     const TensorShape biasShape{outChannels};
     const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
 
     for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
     {
         for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)
         {
             for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
             {
                 for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
                 {
                     for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)
                     {
                         // This loop goes over each output element.
                         float sum = 0.0f;
 
                         // Loop over each input channel.
                         for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)
                         {
                             for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
                             {
                                 for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
                                 {
                                     for (unsigned int cInput = 0; cInput < inChannels; cInput++)
                                     {
                                         // This loop goes over each input element for each output element.
                                         unsigned int filterIndex = 0;
 
                                         // Conv3d weights layout: [D,H,W,I,O]
                                         // Keep this implementation, as using DataLayoutIndexed::GetIndex
                                         // causes large performance regression.
                                         filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +
                                                       yFilter * filterWidth * inChannels * outChannels +
                                                       xFilter * inChannels * outChannels +
                                                       cInput * outChannels +
                                                       cOutput;
 
                                         unsigned int yInput = yOutput * yStride + yFilter * yDilation;
                                         unsigned int xInput = xOutput * xStride + xFilter * xDilation;
                                         unsigned int zInput = zOutput * zStride + zFilter * zDilation;
 
                                         float inputValue;
 
                                         // Check if we're in the padding.
                                         if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
                                             xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||
                                             zInput < paddingFront || zInput >= inputDepth + paddingFront)
                                         {
                                             inputValue = 0.0f;
                                         }
                                         else
                                         {
                                             unsigned int inputIndex = 0;
 
                                             // Keep this implementation, as using DataLayoutIndexed::GetIndex
                                             // causes large performance regression.
                                             if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
                                             {
                                                 inputIndex =
                                                         batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
                                                         (zInput-paddingFront) * inputHeight * inputWidth * inChannels +
                                                         (yInput-paddingTop) * inputWidth * inChannels +
                                                         (xInput-paddingLeft) * inChannels +
                                                         cInput;
                                             }
                                             else
                                             {
                                                 // NCDHW DataLayout
                                                 inputIndex =
                                                         batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
                                                         inputDepth * inputHeight * inputWidth * cInput +
                                                         (zInput-paddingFront) * inputHeight * inputWidth +
                                                         (yInput-paddingTop) * inputWidth +
                                                         xInput-paddingLeft;
                                             }
 
                                             inputValue = inputVec[inputIndex];
                                         }
 
                                         sum += filterVec[filterIndex] * inputValue;
                                     }
                                 }
                             }
                         }
 
                         if (biasEnabled)
                         {
                             sum += biasVec[cOutput];
                         }
 
                         unsigned int outIdx;
                         if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
                         {
                             outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
                                      zOutput * outputHeight * outputWidth * outChannels +
                                      yOutput * outputWidth * outChannels +
                                      xOutput * outChannels +
                                      cOutput;
                         }
                         else
                         {
                             // NCDHW DataLayout
                             outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
                                      cOutput * outputDepth * outputHeight * outputWidth +
                                      zOutput * outputHeight * outputWidth +
                                      yOutput * outputWidth +
                                      xOutput;
                         }
 
                         rOutputEncoder[outIdx];
                         rOutputEncoder.Set(sum);
                     }
                 }
             }
         }
     }
 }

◆ CopyArmComputeClTensorData()

void armnn::CopyArmComputeClTensorData	(	arm_compute::CLTensor &	dstTensor,
		const T *	srcData
	)

Definition at line 55 of file ClWorkloadUtils.hpp.

References ARMNN_SCOPED_PROFILING_EVENT_CL.

Referenced by ClConstantWorkload::Execute().

 {
     {
         ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
         dstTensor.map(true);
     }
 
     {
         ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
         armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
     }
 
     dstTensor.unmap();
 }

◆ CopyArmComputeTensorData()

void armnn::CopyArmComputeTensorData	(	arm_compute::Tensor &	dstTensor,
		const T *	srcData
	)

Definition at line 54 of file NeonWorkloadUtils.hpp.

Referenced by InitializeArmComputeTensorData().

 {
     InitialiseArmComputeTensorEmpty(dstTensor);
     CopyArmComputeITensorData(srcData, dstTensor);
 }

◆ CopyTensorContentsGeneric()

void armnn::CopyTensorContentsGeneric	(	const ITensorHandle *	srcTensor,
		ITensorHandle *	dstTensor,
		CopyFunc	copy
	)

Definition at line 46 of file WorkloadUtils.hpp.

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, TensorShape::GetNumDimensions(), ITensorHandle::GetShape(), ITensorHandle::GetStrides(), IgnoreUnused(), ITensorHandle::Map(), MaxNumOfTensorDimensions, Undefined, and ITensorHandle::Unmap().

Referenced by CopyToOutputTensor(), NeonConvertBf16ToFp32Workload::Execute(), NeonConvertFp32ToBf16Workload::Execute(), NeonConvertFp16ToFp32Workload::Execute(), NeonConvertFp32ToFp16Workload::Execute(), CopyMemGenericWorkload::Execute(), CopyMemGenericWorkload::ExecuteAsync(), and LoadedNetwork::FreeWorkingMemory().

 {
     // For ease of understanding, names are assigned to the dimensions
     // of the tensor as if NHWC, however this routine works with any 5D tensor
     static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
 
     TensorShape srcStrides      = srcTensor->GetStrides();
     const TensorShape& srcShape = srcTensor->GetShape();
     const auto srcSize          = srcTensor->GetStrides()[0] * srcShape[0];
     IgnoreUnused(srcSize);  // Only used for asserts
     TensorShape dstStrides      = dstTensor->GetStrides();
     const TensorShape& dstShape = dstTensor->GetShape();
     const auto dstSize          = dstTensor->GetStrides()[0] * dstShape[0];
     IgnoreUnused(dstSize);  // Only used for asserts
 
     size_t srcDepth    = 1;
     size_t srcBatches  = 1;
     size_t srcHeight   = 1;
     size_t srcWidth    = 1;
     size_t srcChannels = 1;
     AssignValues(srcShape.GetNumDimensions(),
                  0,
                  srcShape,
                  srcChannels,
                  srcWidth,
                  srcHeight,
                  srcBatches,
                  srcDepth);
 
     size_t srcDepthStride   = 0;
     size_t srcBatchStride   = 0;
     size_t srcHeightStride  = 0;
     size_t srcWidthStride   = 0;
     size_t srcChannelStride = 0;
     AssignValues(srcStrides.GetNumDimensions(),
                  0,
                  srcStrides,
                  srcChannelStride,
                  srcWidthStride,
                  srcHeightStride,
                  srcBatchStride,
                  srcDepthStride);
 
     size_t dstDepth    = 1;
     size_t dstBatches  = 1;
     size_t dstHeight   = 1;
     size_t dstWidth    = 1;
     size_t dstChannels = 1;
     AssignValues(dstShape.GetNumDimensions(),
                  0,
                  dstShape,
                  dstChannels,
                  dstWidth,
                  dstHeight,
                  dstBatches,
                  dstDepth);
 
     size_t dstDepthStride   = 0;
     size_t dstBatchStride   = 0;
     size_t dstHeightStride  = 0;
     size_t dstWidthStride   = 0;
     size_t dstChannelStride = 0;
     AssignValues(dstStrides.GetNumDimensions(),
                  0,
                  dstStrides,
                  dstChannelStride,
                  dstWidthStride,
                  dstHeightStride,
                  dstBatchStride,
                  dstDepthStride);
 
     const unsigned char* srcDataStart;
     unsigned char* dstDataStart;
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
         srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
         dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
     }
 
     size_t copyLength  = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
     size_t copyWidth   = std::min(srcWidth, dstWidth);
     size_t copyHeight  = std::min(srcHeight, dstHeight);
     size_t copyBatches = std::min(srcBatches, dstBatches);
     size_t copyDepth   = std::min(srcDepth, dstDepth);
 
     // Coalesce inner dimensions where possible
     // to reduce overheard calling copy() and to
     // allow for memory bandwidth optimisations
     if (copyLength == srcWidthStride &&
         copyLength == dstWidthStride)
     {
         // There is no special padding between rows,
         // and sizes are compatible, so copy whole rows
         copyLength *= copyWidth;
         copyWidth = 1;
 
         if (copyLength == srcHeightStride &&
             copyLength == dstHeightStride)
         {
             // There is no special padding between batches
             // and sizes are compatible so copy whole batches
             copyLength *= copyHeight;
             copyHeight = 1;
         }
     }
 
     const unsigned char* srcData = srcDataStart;
     unsigned char* dstData = dstDataStart;
     for (unsigned int d = 0; d < copyDepth; ++d)
     {
         auto srcPtrDepth = srcData;
         auto dstPtrDepth = dstData;
         for (unsigned int b = 0; b < copyBatches; ++b)
         {
             auto srcPtrBatch = srcData;
             auto dstPtrBatch = dstData;
             for (unsigned int h = 0; h < copyHeight; ++h)
             {
                 auto srcPtrChannel = srcData;
                 auto dstPtrChannel = dstData;
                 for (unsigned int w = 0; w < copyWidth; ++w)
                 {
                     ARMNN_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
                     ARMNN_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
                     copy(dstData, srcData, copyLength);
                     dstData += dstWidthStride;
                     srcData += srcWidthStride;
                 }
                 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
                 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
             }
             dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
             srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
         }
         dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
         srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
     }
 
     srcTensor->Unmap();
     dstTensor->Unmap();
 }

◆ CopyToOutputTensor()

void armnn::CopyToOutputTensor	(	const Tensor &	outputTensor,
		ITensorHandle *	outputTensorHandle
	)

Definition at line 1294 of file LoadedNetwork.cpp.

References CopyTensorContentsGeneric(), BaseTensor< MemoryType >::GetInfo(), and BaseTensor< MemoryType >::GetMemoryArea().

Referenced by LoadedNetwork::Execute().

 {
     auto copyFunc = [](void* dst, const void* src, size_t size)
     {
         memcpy(dst, src, size);
     };
 
     std::unique_ptr<ITensorHandle> tensorHandle =
             std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
                                                       outputTensor.GetMemoryArea());
 
     CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
 }

◆ CreateAclNormalizationLayerInfoForL2Normalization()

arm_compute::NormalizationLayerInfo armnn::CreateAclNormalizationLayerInfoForL2Normalization	(	const armnn::TensorInfo &	tensorInfo,
		armnn::DataLayout	dataLayout
	)

inline

Definition at line 28 of file ArmComputeUtils.hpp.

References TensorInfo::GetShape(), and NCHW.

 {
     unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
     const unsigned int depth = tensorInfo.GetShape()[depthDimension];
 
     // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
     // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
     // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
     // parameters.
     //
     // Please refer to both the reference implementation of the normalization layer and the implementation of
     // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
 
     // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
     // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
     // ACL's normalization_layer_cross_map() CL function.
     const uint32_t normSize = depth * 2u + 1u;
 
     // See ACL's NormalizationLayerInfo::scale_coeff() definition.
     // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
     const float alpha = 1.0f;
 
     // Don't offset the reduction.
     const float kappa = 0.0f;
 
     // pow(reduction, -0.5) = 1 / sqrt(reduction)
     const float beta = 0.5f;
 
     return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
 }

◆ CreateClContext()

flatbuffers::Offset<ClContext> armnn::CreateClContext	(	flatbuffers::FlatBufferBuilder &	_fbb,
		flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>>	programs = `0`
	)

inline

Definition at line 57 of file ClContextSchema_generated.h.

References ClContextBuilder::add_programs(), and ClContextBuilder::Finish().

Referenced by CreateClContextDirect(), and ClContextSerializer::Serialize().

                                                                                         {
   ClContextBuilder builder_(_fbb);
   builder_.add_programs(programs);
   return builder_.Finish();
 }

◆ CreateClContextDirect()

flatbuffers::Offset<ClContext> armnn::CreateClContextDirect	(	flatbuffers::FlatBufferBuilder &	_fbb,
		const std::vector< flatbuffers::Offset< armnn::Program >> *	programs = `nullptr`
	)

inline

Definition at line 65 of file ClContextSchema_generated.h.

References CreateClContext().

                                                                           {
   auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<armnn::Program>>(*programs) : 0;
   return armnn::CreateClContext(
       _fbb,
       programs__);
 }

◆ CreateDescriptorForConcatenation()

OriginsDescriptor armnn::CreateDescriptorForConcatenation	(	TensorShapeIt	first,
		TensorShapeIt	last,
		unsigned int	concatenationDimension
	)

Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors.

Definition at line 261 of file Descriptors.hpp.

References OriginsDescriptor::SetConcatAxis(), and OriginsDescriptor::SetViewOriginCoord().

Referenced by ConcatDifferentInputOutputQParamTest(), CreateDescriptorForConcat(), and TEST_SUITE().

 {
     auto numInputs = std::distance(first, last);
 
     if (numInputs < 2)
     {
         throw InvalidArgumentException("Concatenation requires at least 2 inputs");
     }
 
     const auto& firstInputShape = *first;
 
     const unsigned int numDimensions = firstInputShape.GetNumDimensions();
     for (auto it = first + 1; it != last; ++it)
     {
         if (it->GetNumDimensions() != numDimensions)
         {
             throw InvalidArgumentException("All inputs to concatenation must have the same number of dimensions");
         }
     }
 
     if (concatenationDimension >= numDimensions)
     {
         throw InvalidArgumentException("concatenationDimension must be between 0 and the number of dimensions.");
     }
 
     for (auto it = first; it != last; ++it)
     {
         for (unsigned int d = 0; d < numDimensions; ++d)
         {
             const bool dimSizeOk = (d == concatenationDimension) || (firstInputShape[d] == (*it)[d]);
             if (!dimSizeOk)
             {
                 throw InvalidArgumentException("All inputs to concatenation must be the same size along all dimensions "
                     " except the concatenation dimension");
             }
         }
     }
 
     OriginsDescriptor viewsDescriptor(static_cast<uint32_t>(numInputs), numDimensions);
     viewsDescriptor.SetConcatAxis(concatenationDimension);
 
     uint32_t viewIndex = 0u;
     uint32_t coordAlongConcatDim = 0u;
     for (auto it = first; it != last; ++it)
     {
         const auto& inputShape = *it;
 
         for (unsigned int i = 0; i < concatenationDimension; ++i)
         {
             viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
         }
 
         viewsDescriptor.SetViewOriginCoord(viewIndex, concatenationDimension, coordAlongConcatDim);
         unsigned int dimSize = inputShape[concatenationDimension];
         coordAlongConcatDim += dimSize;
 
 
         for (unsigned int i = concatenationDimension + 1; i < numDimensions; ++i)
         {
             viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
         }
 
         ++viewIndex;
     }
 
     return viewsDescriptor;
 }

◆ CreateProgram()

flatbuffers::Offset<Program> armnn::CreateProgram	(	flatbuffers::FlatBufferBuilder &	_fbb,
		flatbuffers::Offset< flatbuffers::String >	name = `0`,
		flatbuffers::Offset< flatbuffers::Vector< uint8_t >>	binary = `0`
	)

inline

Definition at line 118 of file ClContextSchema_generated.h.

References ProgramBuilder::add_binary(), ProgramBuilder::add_name(), and ProgramBuilder::Finish().

Referenced by CreateProgramDirect(), and ClContextSerializer::Serialize().

                                                               {
   ProgramBuilder builder_(_fbb);
   builder_.add_binary(binary);
   builder_.add_name(name);
   return builder_.Finish();
 }

◆ CreateProgramDirect()

flatbuffers::Offset<Program> armnn::CreateProgramDirect	(	flatbuffers::FlatBufferBuilder &	_fbb,
		const char *	name = `nullptr`,
		const std::vector< uint8_t > *	binary = `nullptr`
	)

inline

Definition at line 128 of file ClContextSchema_generated.h.

References CreateProgram().

                                                 {
   auto name__ = name ? _fbb.CreateString(name) : 0;
   auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
   return armnn::CreateProgram(
       _fbb,
       name__,
       binary__);
 }

◆ CreateSupportedBackends()

BackendsMap CreateSupportedBackends	(	TensorHandleFactoryRegistry &	handleFactoryRegistry,
		BackendSettings &	backendSettings
	)

Definition at line 1120 of file Network.cpp.

References ARMNN_ASSERT, BackendRegistryInstance(), and BackendSettings::m_SupportedBackends.

Referenced by Optimize().

 {
     BackendsMap backends;
     auto const& backendRegistry = BackendRegistryInstance();
     for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
     {
         auto backendFactory = backendRegistry.GetFactory(selectedBackend);
         auto backendObjPtr = backendFactory();
         ARMNN_ASSERT(backendObjPtr);
 
         backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
 
         backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
     }
 
     return backends;
 }

◆ Debug()

void Debug	(	const TensorInfo &	inputInfo,
		const T *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Definition at line 19 of file Debug.cpp.

References Debug< BFloat16 >(), Debug< float >(), Debug< Half >(), Debug< int16_t >(), Debug< int32_t >(), Debug< int8_t >(), Debug< uint8_t >(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), and TensorInfo::GetShape().

Referenced by RefDebugWorkload< DataType >::ExecuteAsync().

 {
     const unsigned int numDims = inputInfo.GetNumDimensions();
     const unsigned int numElements = inputInfo.GetNumElements();
     const TensorShape& inputShape = inputInfo.GetShape();
 
     std::vector<unsigned int> strides(numDims, 0);
     strides[numDims - 1] = inputShape[numDims - 1];
 
     for (unsigned int i = 2; i <= numDims; i++)
     {
         strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
     }
 
     std::cout << "{ ";
     std::cout << "\"layerGuid\": " << guid << ", ";
     std::cout << "\"layerName\": \"" << layerName << "\", ";
     std::cout << "\"outputSlot\": " << slotIndex << ", ";
     std::cout << "\"shape\": ";
 
     std::cout << "[";
     for (unsigned int i = 0; i < numDims; i++)
     {
         std::cout << inputShape[i];
         if (i != numDims - 1)
         {
             std::cout << ", ";
         }
     }
     std::cout << "], ";
 
     std::cout << "\"min\": "
         << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
 
     std::cout << "\"max\": "
         << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
 
     std::cout << "\"data\": ";
 
     for (unsigned int i = 0; i < numElements; i++)
     {
         for (unsigned int j = 0; j < numDims; j++)
         {
             if (i % strides[j] == 0)
             {
                 std::cout << "[" ;
             }
         }
 
         std::cout << static_cast<float>(inputData[i]);
 
         for (unsigned int j = 0; j < numDims; j++)
         {
             if ((i+1) % strides[j] == 0)
             {
                 std::cout << "]" ;
             }
         }
 
         if (i != numElements - 1)
         {
             std::cout << ", ";
         }
     }
 
     std::cout << " }" << std::endl;
 }

◆ Debug< BFloat16 >()

template void armnn::Debug< BFloat16 >	(	const TensorInfo &	inputInfo,
		const BFloat16 *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ Debug< float >()

template void armnn::Debug< float >	(	const TensorInfo &	inputInfo,
		const float *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ Debug< Half >()

template void armnn::Debug< Half >	(	const TensorInfo &	inputInfo,
		const Half *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ Debug< int16_t >()

template void armnn::Debug< int16_t >	(	const TensorInfo &	inputInfo,
		const int16_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ Debug< int32_t >()

template void armnn::Debug< int32_t >	(	const TensorInfo &	inputInfo,
		const int32_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ Debug< int8_t >()

template void armnn::Debug< int8_t >	(	const TensorInfo &	inputInfo,
		const int8_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ Debug< uint8_t >()

template void armnn::Debug< uint8_t >	(	const TensorInfo &	inputInfo,
		const uint8_t *	inputData,
		LayerGuid	guid,
		const std::string &	layerName,
		unsigned int	slotIndex
	)

Referenced by Debug().

◆ DepthToSpace()

void DepthToSpace	(	const TensorInfo &	inputInfo,
		const DepthToSpaceDescriptor &	descriptor,
		const void *	inputData,
		void *	outputData,
		unsigned int	dataTypeSize
	)

Definition at line 18 of file DepthToSpace.cpp.

References ARMNN_ASSERT, DepthToSpace(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumElements(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, NCHW, and armnnUtils::Permute().

Referenced by DepthToSpace(), and TEST_SUITE().

 {
     const unsigned int blockSize = descriptor.m_BlockSize;
     ARMNN_ASSERT(blockSize != 0u);
 
     const TensorShape& inputShape = inputInfo.GetShape();
     const unsigned int batches = inputShape[0];
 
     armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
     const unsigned int inDepth  = inputShape[dataLayoutIndexed.GetChannelsIndex()];
     const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
     const unsigned int inWidth  = inputShape[dataLayoutIndexed.GetWidthIndex()];
 
     const unsigned int outDepth = inDepth / (blockSize * blockSize);
 
     // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
     //
     // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
     // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
     //
     // DepthToSpace can then be implemented as a permutation in 6D resulting in
     // the following shapes:
     //
     // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
     // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
     //
     // NOTE:
     // Since 6D tensors are not currently supported, in practice we need to handle each
     // batch separately and execute 5D permutations
 
     TensorShape permDestShape;
     PermutationVector permVector{};
     if (descriptor.m_DataLayout == DataLayout::NCHW)
     {
         permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
         permVector    = { 2, 4, 0, 1, 3 };
     }
     else
     {
         permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
         permVector    = { 0, 2, 1, 3, 4 };
     }
 
     const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
 
     for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
     {
         const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
 
         armnnUtils::Permute(permDestShape,
                             permVector,
                             static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
                             static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
                             dataTypeSize);
     }
 }

◆ Dequantize() [1/4]

void Dequantize	(	Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo
	)

Definition at line 13 of file Dequantize.cpp.

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumElements(), IgnoreUnused(), and Encoder< IType >::Set().

 {
     IgnoreUnused(outputInfo);
     ARMNN_ASSERT(inputInfo.GetNumElements() == outputInfo.GetNumElements());
     for (unsigned int i = 0; i < inputInfo.GetNumElements(); i++)
     {
         // inputDecoder.Get() dequantizes the data element from whatever
         // type is given by inputInfo to fp32 (If MakeDecoder supports that dequantization)
         // outputEncoder.Set() transforms the data element to whatever type is
         // given by outputInfo (if MakeEncoder supports that transformation)
         outputEncoder.Set(inputDecoder.Get());
         ++outputEncoder;
         ++inputDecoder;
     }
 }

◆ Dequantize() [2/4]

std::vector<float> armnn::Dequantize	(	const T *	quant,
		const TensorInfo &	info
	)

u8 helpers

Definition at line 95 of file RefWorkloadUtils.hpp.

References Dequantize(), TensorInfo::GetNumElements(), TensorInfo::GetQuantizationOffset(), and TensorInfo::GetQuantizationScale().

 {
     std::vector<float> ret(info.GetNumElements());
     for (size_t i = 0; i < info.GetNumElements(); i++)
     {
         ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
     }
     return ret;
 }

◆ Dequantize() [3/4]

void armnn::Dequantize	(	const T *	inputData,
		float *	outputData,
		const TensorInfo &	info
	)

inline

Definition at line 106 of file RefWorkloadUtils.hpp.

References TensorInfo::GetNumElements(), TensorInfo::GetQuantizationOffset(), and TensorInfo::GetQuantizationScale().

 {
     for (unsigned int i = 0; i < info.GetNumElements(); i++)
     {
         outputData[i] = Dequantize<T>(inputData[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
     }
 }

◆ Dequantize() [4/4]

float Dequantize	(	QuantizedType	value,
		float	scale,
		int32_t	offset
	)

Dequantize an 8-bit data type into a floating point data type.

Parameters

value	- The value to dequantize.
scale	- The scale (must be non-zero).
offset	- The offset.

Returns: - The dequantized value calculated as (value-offset)*scale.

Definition at line 46 of file TypesUtils.cpp.

References ARMNN_ASSERT.

Referenced by SelectiveQuantizer< T, DoQuantize >::Dequantize(), Dequantize(), TensorPrinter::operator()(), and TEST_SUITE().

 {
     static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
     ARMNN_ASSERT(scale != 0.f);
     ARMNN_ASSERT(!IsNan(value));
     return (armnn::numeric_cast<float>(value - offset)) * scale;
 }

◆ DetectionPostProcess()

void DetectionPostProcess	(	const TensorInfo &	boxEncodingsInfo,
		const TensorInfo &	scoresInfo,
		const TensorInfo &	anchorsInfo,
		const TensorInfo &	detectionBoxesInfo,
		const TensorInfo &	detectionClassesInfo,
		const TensorInfo &	detectionScoresInfo,
		const TensorInfo &	numDetectionsInfo,
		const DetectionPostProcessDescriptor &	desc,
		Decoder< float > &	boxEncodings,
		Decoder< float > &	scores,
		Decoder< float > &	anchors,
		float *	detectionBoxes,
		float *	detectionClasses,
		float *	detectionScores,
		float *	numDetections
	)

Definition at line 140 of file DetectionPostProcess.cpp.

Referenced by TEST_SUITE().

 {
     IgnoreUnused(anchorsInfo, detectionClassesInfo, detectionScoresInfo, numDetectionsInfo);
 
     // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
     // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
     std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
 
     const unsigned int numBoxes  = boxEncodingsInfo.GetShape()[1];
     const unsigned int numScores = scoresInfo.GetNumElements();
 
     for (unsigned int i = 0; i < numBoxes; ++i)
     {
         // Y
         float boxEncodingY = boxEncodings.Get();
         float anchorY      = anchors.Get();
 
         ++boxEncodings;
         ++anchors;
 
         // X
         float boxEncodingX = boxEncodings.Get();
         float anchorX      = anchors.Get();
 
         ++boxEncodings;
         ++anchors;
 
         // H
         float boxEncodingH = boxEncodings.Get();
         float anchorH      = anchors.Get();
 
         ++boxEncodings;
         ++anchors;
 
         // W
         float boxEncodingW = boxEncodings.Get();
         float anchorW      = anchors.Get();
 
         ++boxEncodings;
         ++anchors;
 
         float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
         float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
 
         float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
         float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
 
         unsigned int indexY = i * 4;
         unsigned int indexX = indexY + 1;
         unsigned int indexH = indexX + 1;
         unsigned int indexW = indexH + 1;
 
         // ymin
         boxCorners[indexY] = yCentre - halfH;
         // xmin
         boxCorners[indexX] = xCentre - halfW;
         // ymax
         boxCorners[indexH] = yCentre + halfH;
         // xmax
         boxCorners[indexW] = xCentre + halfW;
 
         ARMNN_ASSERT(boxCorners[indexY] < boxCorners[indexH]);
         ARMNN_ASSERT(boxCorners[indexX] < boxCorners[indexW]);
     }
 
     unsigned int numClassesWithBg = desc.m_NumClasses + 1;
 
     // Decode scores
     std::vector<float> decodedScores;
     decodedScores.reserve(numScores);
 
     for (unsigned int i = 0u; i < numScores; ++i)
     {
         decodedScores.emplace_back(scores.Get());
         ++scores;
     }
 
     // Perform Non Max Suppression.
     if (desc.m_UseRegularNms)
     {
         // Perform Regular NMS.
         // For each class, perform NMS and select max detection numbers of the highest score across all classes.
         std::vector<float> classScores(numBoxes);
 
         std::vector<unsigned int> selectedBoxesAfterNms;
         selectedBoxesAfterNms.reserve(numBoxes);
 
         std::vector<float> selectedScoresAfterNms;
         selectedBoxesAfterNms.reserve(numScores);
 
         std::vector<unsigned int> selectedClasses;
 
         for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
         {
             // For each boxes, get scores of the boxes for the class c.
             for (unsigned int i = 0; i < numBoxes; ++i)
             {
                 classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
             }
             std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
                                                                           boxCorners,
                                                                           classScores,
                                                                           desc.m_NmsScoreThreshold,
                                                                           desc.m_DetectionsPerClass,
                                                                           desc.m_NmsIouThreshold);
 
             for (unsigned int i = 0; i < selectedIndices.size(); ++i)
             {
                 selectedBoxesAfterNms.push_back(selectedIndices[i]);
                 selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
                 selectedClasses.push_back(c);
             }
         }
 
         // Select max detection numbers of the highest score across all classes
         unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
         unsigned int numOutput = std::min(desc.m_MaxDetections,  numSelected);
 
         // Sort the max scores among the selected indices.
         std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
         TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
 
         AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, outputIndices,
                            selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
                            detectionBoxes, detectionScores, detectionClasses, numDetections);
     }
     else
     {
         // Perform Fast NMS.
         // Select max scores of boxes and perform NMS on max scores,
         // select max detection numbers of the highest score
         unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
         std::vector<float> maxScores;
         std::vector<unsigned int>boxIndices;
         std::vector<unsigned int>maxScoreClasses;
 
         for (unsigned int box = 0; box < numBoxes; ++box)
         {
             unsigned int scoreIndex = box * numClassesWithBg + 1;
 
             // Get the max scores of the box.
             std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
             TopKSort(numClassesPerBox, maxScoreIndices.data(),
                 decodedScores.data() + scoreIndex, desc.m_NumClasses);
 
             for (unsigned int i = 0; i < numClassesPerBox; ++i)
             {
                 maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
                 maxScoreClasses.push_back(maxScoreIndices[i]);
                 boxIndices.push_back(box);
             }
         }
 
         // Perform NMS on max scores
         std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
                                                                       desc.m_NmsScoreThreshold,
                                                                       desc.m_MaxDetections,
                                                                       desc.m_NmsIouThreshold);
 
         unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedIndices.size());
         unsigned int numOutput = std::min(desc.m_MaxDetections,  numSelected);
 
         AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, selectedIndices,
                            boxIndices, maxScoreClasses, maxScores,
                            detectionBoxes, detectionScores, detectionClasses, numDetections);
     }
 }

◆ ExtractJsonObjects()

void armnn::ExtractJsonObjects	(	unsigned int	inferenceIndex,
		const Event *	parentEvent,
		JsonChildObject &	parentObject,
		std::map< const Event , std::vector< const Event >>	descendantsMap
	)

Definition at line 303 of file Profiling.cpp.

References JsonChildObject::AddChild(), JsonChildObject::AddMeasurement(), ARMNN_ASSERT, Event, JsonChildObject::GetChild(), Event::GetMeasurements(), Event::GetProfilingGuid(), OptionalBase::has_value(), Measurement, JsonChildObject::NumChildren(), JsonChildObject::SetGuid(), JsonChildObject::SetType(), JsonChildObject::SetUnit(), and OptionalReferenceSwitch< IsReference, T >::value().

Referenced by ProfilerImpl::Print().

 {
     ARMNN_ASSERT(parentEvent);
 
     // If profiling GUID is entered, process it
     if (parentEvent->GetProfilingGuid().has_value())
     {
         arm::pipe::ProfilingGuid profilingGuid;
         profilingGuid = parentEvent->GetProfilingGuid().value();
         parentObject.SetGuid(profilingGuid);
     }
     std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
     unsigned int childIdx = 0;
     for (size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
     {
         if (inferenceIndex == 0)
         {
             // Only add kernel measurement once, in case of multiple inferences
             JsonChildObject measurementObject{ instrumentMeasurements[measurementIndex].m_Name };
             measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
             measurementObject.SetType(JsonObjectType::Measurement);
 
             ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
             parentObject.AddChild(measurementObject);
         }
 
         parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
     }
 
     auto childEventsIt = descendantsMap.find(parentEvent);
     if (childEventsIt != descendantsMap.end())
     {
         for (auto childEvent : childEventsIt->second)
         {
             if (inferenceIndex == 0)
             {
                 // Only add second level once, in case of multiple inferences
                 JsonChildObject childObject{ childEvent->GetName() };
                 childObject.SetType(JsonObjectType::Event);
                 parentObject.AddChild(childObject);
             }
 
             // It's possible that childIdx can overrun the parents' child vector. Check before we try to process a
             // non-existent child.
             if (childIdx < parentObject.NumChildren())
             {
                 // Recursively process children.
                 ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
                 childIdx++;
             }
         }
     }
 }

◆ FakeQuantization()

void armnn::FakeQuantization	(	const float *	inputData,
		float *	outputData,
		uint32_t	numElements,
		float	min,
		float	max
	)

Definition at line 17 of file RefFakeQuantizationFloat32Workload.cpp.

References numeric_cast().

Referenced by TEST_SUITE().

 {
     float scale = (max - min) / 255.f;
     int32_t offset = armnn::numeric_cast<int32_t>((-min * 255.f) / (max - min));
 
     for (uint32_t i = 0; i < numElements; i++)
     {
         outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
     }
 
 }

◆ FalseFunc()

bool armnn::FalseFunc	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 62 of file LayerSupportCommon.hpp.

References IgnoreUnused().

 {
     IgnoreUnused(reasonIfUnsupported);
     IgnoreUnused(params...);
     return false;
 }

◆ FalseFuncF16()

bool armnn::FalseFuncF16	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 70 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type");
     return false;
 }

◆ FalseFuncF32()

bool armnn::FalseFuncF32	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 78 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type");
     return false;
 }

◆ FalseFuncI32()

bool armnn::FalseFuncI32	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 94 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with int32 data type");
     return false;
 }

◆ FalseFuncU8()

bool armnn::FalseFuncU8	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 86 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with 8-bit data type");
     return false;
 }

◆ FalseInputFuncF16()

bool armnn::FalseInputFuncF16	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 110 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type input");
     return false;
 }

◆ FalseInputFuncF32()

bool armnn::FalseInputFuncF32	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 102 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type input");
     return false;
 }

◆ FalseOutputFuncF16()

bool armnn::FalseOutputFuncF16	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 126 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type output");
     return false;
 }

◆ FalseOutputFuncF32()

bool armnn::FalseOutputFuncF32	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 118 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

 {
     IgnoreUnused(params...);
     SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type output");
     return false;
 }

◆ Fill()

void Fill	(	Encoder< float > &	output,
		const TensorShape &	desiredOutputShape,
		const float	value
	)

Creates a tensor and fills it with a scalar value.

Definition at line 13 of file Fill.cpp.

References TensorShape::GetNumElements(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

 {
     for(unsigned int i = 0; i < desiredOutputShape.GetNumElements(); ++i)
     {
         output[i];
         output.Set(value);
     }
 }

◆ FindKernelMeasurements()

std::vector<Measurement> armnn::FindKernelMeasurements ( const Event * event )

Definition at line 62 of file Profiling.cpp.

References ARMNN_ASSERT, and Event::GetMeasurements().

 {
     ARMNN_ASSERT(event != nullptr);
 
     std::vector<Measurement> measurements;
 
     // Search through the measurements.
     for (const auto& measurement : event->GetMeasurements())
     {
         if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
             || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
         {
             // Measurement found.
             measurements.push_back(measurement);
         }
     }
 
     return measurements;
 }

◆ FindMeasurement()

Measurement armnn::FindMeasurement	(	const std::string &	name,
		const Event *	event
	)

Definition at line 43 of file Profiling.cpp.

References ARMNN_ASSERT, and Event::GetMeasurements().

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults(), and ProfilerImpl::CalculateProfilingEventStats().

 {
 
     ARMNN_ASSERT(event != nullptr);
 
     // Search though the measurements.
     for (const auto& measurement : event->GetMeasurements())
     {
         if (measurement.m_Name == name)
         {
             // Measurement found.
             return measurement;
         }
     }
 
     // Measurement not found.
     return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
 }

◆ FinishClContextBuffer()

void armnn::FinishClContextBuffer	(	flatbuffers::FlatBufferBuilder &	fbb,
		flatbuffers::Offset< armnn::ClContext >	root
	)

inline

Definition at line 171 of file ClContextSchema_generated.h.

References ClContextIdentifier().

                                             {
   fbb.Finish(root, ClContextIdentifier());
 }

◆ FinishSizePrefixedClContextBuffer()

void armnn::FinishSizePrefixedClContextBuffer	(	flatbuffers::FlatBufferBuilder &	fbb,
		flatbuffers::Offset< armnn::ClContext >	root
	)

inline

Definition at line 177 of file ClContextSchema_generated.h.

References ClContextIdentifier().

                                             {
   fbb.FinishSizePrefixed(root, ClContextIdentifier());
 }

◆ ForEachLayerInput()

void armnn::ForEachLayerInput	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo,
		Delegate	function
	)

Definition at line 267 of file SubgraphViewSelector.cpp.

References ARMNN_ASSERT_MSG, and Layer::GetInputSlots().

Referenced by AssignSplitId(), and IsReadyForSplitAssignment().

 {
     Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
 
     for (auto inputSlot : layer.GetInputSlots())
     {
         auto connectedInput = PolymorphicDowncast<OutputSlot*>(inputSlot.GetConnection());
         ARMNN_ASSERT_MSG(connectedInput, "Dangling input slot detected.");
         Layer& inputLayer = connectedInput->GetOwningLayer();
 
         auto parentInfo = layerInfos.find(&inputLayer);
         if (parentInfo != layerInfos.end())
         {
             function(parentInfo->second);
         }
     }
 }

◆ ForEachLayerOutput()

void armnn::ForEachLayerOutput	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo,
		Delegate	function
	)

Definition at line 288 of file SubgraphViewSelector.cpp.

References Layer::GetOutputSlots().

Referenced by SubgraphViewSelector::SelectSubgraphs().

 {
     Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
 
     for (auto& outputSlot : layer.GetOutputSlots())
     {
         for (auto& output : outputSlot.GetConnections())
         {
             Layer& childLayer = output->GetOwningLayer();
 
             auto childInfo = layerInfos.find(&childLayer);
             if (childInfo != layerInfos.end())
             {
                 function(childInfo->second);
             }
         }
     }
 }

◆ FullyConnected()

void FullyConnected	(	const TensorShape &	rInputShape,
		Decoder< float > &	rInputDecoder,
		const TensorShape &	rOutputShape,
		Encoder< float > &	rOutputEncoder,
		const TensorShape &	rWeightsShape,
		Decoder< float > &	rWeightDecoder,
		Decoder< float > *	pBiasDecoder,
		const bool	biasEnabled,
		const unsigned int	K,
		const bool	transposeWeights
	)

Performs a matrix multiplication and optionally adds a bias.

Definition at line 15 of file FullyConnected.cpp.

References ARMNN_ASSERT, Decoder< IType >::DecodeTensor(), and Encoder< IType >::Set().

 {
     // Perform FullyConnected implementation
     unsigned int outputSize = rOutputShape[1];
 
     const std::vector<float> decodedInputs = rInputDecoder.DecodeTensor(rInputShape);
     const std::vector<float> decodedWeights = rWeightDecoder.DecodeTensor(rWeightsShape);
 
     const TensorShape biasShape{outputSize};
 
     ARMNN_ASSERT(!biasEnabled || pBiasDecoder != nullptr);
     const std::vector<float> decodedBiases = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
 
 
     for (unsigned int n = 0; n < rInputShape[0]; n++)
     {
         for (unsigned int channelOutput = 0; channelOutput < outputSize; channelOutput++)
         {
             float outval = 0.f;
 
             for (unsigned int channelInput = 0; channelInput < K; channelInput++)
             {
                 float weight;
                 if (transposeWeights)
                 {
                     weight = decodedWeights[channelOutput * K + channelInput];
                 }
                 else
                 {
                     weight = decodedWeights[channelInput * outputSize + channelOutput];
                 }
 
                 outval += weight * decodedInputs[n * K + channelInput];
             }
 
             if (biasEnabled)
             {
                 outval += decodedBiases[channelOutput];
             }
 
             rOutputEncoder[n * outputSize + channelOutput];
             rOutputEncoder.Set(outval);
         }
     }
 }

◆ FuseAdditionLayer()

LayerType* armnn::FuseAdditionLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 116 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddAdditionLayer(name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     return replacementLayer;
 }

◆ FuseBatchNormalizationLayer()

LayerType* armnn::FuseBatchNormalizationLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 192 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddBatchNormalizationLayer(baseLayer->GetParameters(),
                                                                     ConstTensor(),
                                                                     ConstTensor(),
                                                                     ConstTensor(),
                                                                     ConstTensor(),
                                                                     name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     SubgraphView substitutionSubgraph({baseLayer, activationLayer},
                                       CreateIInputsFrom({baseLayer}),
                                       CreateIOutputsFrom({activationLayer}));
     SubgraphView replacementSubgraph(replacementLayer);
 
     return replacementLayer;
 }

◆ FuseConvolution2dLayer()

LayerType* armnn::FuseConvolution2dLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 222 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()
                                                       ->AddConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
 
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
     replacementLayer->m_Bias = std::move(baseLayer->m_Bias);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     return replacementLayer;
 }

◆ FuseDepthwiseConvolution2dLayer()

LayerType* armnn::FuseDepthwiseConvolution2dLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 246 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
 
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
     replacementLayer->m_Bias = std::move(baseLayer->m_Bias);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     return replacementLayer;
 }

◆ FuseDivisionLayer()

LayerType* armnn::FuseDivisionLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 154 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddDivisionLayer(name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     return replacementLayer;
 }

◆ FuseFullyConnectedLayer()

LayerType* armnn::FuseFullyConnectedLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 270 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement =
         optimizationViews.GetINetwork()->AddFullyConnectedLayer(baseLayer->GetParameters(),
                                                                 name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
     replacementLayer->m_Bias   = std::move(baseLayer->m_Bias);
 
     return replacementLayer;
 }

◆ FuseLayer()

LayerType* armnn::FuseLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		LayerType *	replacementLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc
	)

Definition at line 96 of file ArmComputeSubgraphUtils.hpp.

References OptimizationViews::AddSubstitution().

Referenced by FuseAdditionLayer(), FuseBatchNormalizationLayer(), FuseConvolution2dLayer(), FuseDepthwiseConvolution2dLayer(), FuseDivisionLayer(), FuseFullyConnectedLayer(), FuseMultiplicationLayer(), and FuseSubtractionLayer().

 {
     replacementLayer->SetAdditionalInfoForObject(
         std::make_shared<ActivationDescriptor>(activationDesc));
 
     SubgraphView substitutionSubgraph({baseLayer, activationLayer},
                                       CreateIInputsFrom({baseLayer}),
                                       CreateIOutputsFrom({activationLayer}));
     SubgraphView replacementSubgraph(replacementLayer);
 
     optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
 
     return replacementLayer;
 }

◆ FuseMultiplicationLayer()

LayerType* armnn::FuseMultiplicationLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 173 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddMultiplicationLayer(name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     return replacementLayer;
 }

◆ FuseSubtractionLayer()

LayerType* armnn::FuseSubtractionLayer	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		ActivationLayer *	activationLayer,
		ActivationDescriptor &	activationDesc,
		std::string	name
	)

Definition at line 135 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

 {
     IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddSubtractionLayer(name.c_str());
     LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
 
     FuseLayer(optimizationViews,
               baseLayer,
               replacementLayer,
               activationLayer,
               activationDesc);
 
     return replacementLayer;
 }

◆ Gather()

void Gather	(	const TensorInfo &	paramsInfo,
		const TensorInfo &	indicesInfo,
		const TensorInfo &	outputInfo,
		Decoder< float > &	params,
		const int32_t *	indices,
		Encoder< float > &	output,
		const int32_t	axis
	)

Definition at line 17 of file Gather.cpp.

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), numeric_cast(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

 {
     IgnoreUnused(outputInfo);
     IgnoreUnused(axis);
 
     const TensorShape& paramsShape = paramsInfo.GetShape();
 
     unsigned int paramsProduct = 1;
     for (unsigned int i = 1; i < paramsInfo.GetNumDimensions(); ++i)
     {
         paramsProduct = paramsProduct * paramsShape[i];
     }
 
     unsigned int outIndex = 0;
     for (unsigned int i = 0; i < indicesInfo.GetNumElements(); ++i)
     {
         unsigned int indx = armnn::numeric_cast<unsigned int>(indices[i]);
 
         ARMNN_ASSERT(indices[i] >= 0 && indx < paramsShape[0]);
 
         unsigned int startOffset = indx * paramsProduct;
         unsigned int endOffset = startOffset + paramsProduct;
 
         for (unsigned int j = startOffset; j < endOffset; ++j)
         {
             params[j];
             float outputValue = params.Get();
             output[outIndex];
             output.Set(outputValue);
             ++outIndex;
         }
     }
 
     ARMNN_ASSERT(outIndex == outputInfo.GetNumElements());
 }

◆ GatherTensorHandlePairs()

void armnn::GatherTensorHandlePairs	(	const DescriptorType &	descriptor,
		std::vector< std::pair< SrcTensorHandleType , DstTensorHandleType >> &	tensorHandlePairs
	)

Definition at line 189 of file WorkloadUtils.hpp.

References CalculateGatherNdKeyIndices(), Convert1HWOTensorInfoToAcl(), Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), ConvertMaskToACLFormat(), ConvertWeightTensorFromArmnnToAcl(), ConvertWeightTensorInfoFromArmnnToAcl(), PermuteTensor(), and ReshapeWeightsForAcl().

Referenced by CopyMemGenericWorkload::CopyMemGenericWorkload(), CopyMemGenericWorkload::ExecuteAsync(), NeonConvertBf16ToFp32Workload::NeonConvertBf16ToFp32Workload(), NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(), NeonConvertFp32ToBf16Workload::NeonConvertFp32ToBf16Workload(), and NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload().

 {
     const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
     tensorHandlePairs.reserve(numInputs);
 
     for (unsigned int i = 0; i < numInputs; ++i)
     {
         SrcTensorHandleType* const srcTensorHandle =
             PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
         DstTensorHandleType* const dstTensorHandle =
             PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
 
         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
     }
 }

◆ GenerateRangeK()

std::vector<unsigned int> armnn::GenerateRangeK ( unsigned int k )

Definition at line 17 of file DetectionPostProcess.cpp.

Referenced by DetectionPostProcess(), and NonMaxSuppression().

 {
     std::vector<unsigned int> range(k);
     std::iota(range.begin(), range.end(), 0);
     return range;
 }

◆ GetActivationFunctionAsCString()

constexpr char const* armnn::GetActivationFunctionAsCString ( ActivationFunction activation )

Definition at line 27 of file TypesUtils.hpp.

References Abs, BoundedReLu, Elu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by StringifyLayerParameters< ActivationDescriptor >::Serialize().

 {
     switch (activation)
     {
         case ActivationFunction::Sigmoid:       return "Sigmoid";
         case ActivationFunction::TanH:          return "TanH";
         case ActivationFunction::Linear:        return "Linear";
         case ActivationFunction::ReLu:          return "ReLu";
         case ActivationFunction::BoundedReLu:   return "BoundedReLu";
         case ActivationFunction::SoftReLu:      return "SoftReLu";
         case ActivationFunction::LeakyReLu:     return "LeakyReLu";
         case ActivationFunction::Abs:           return "Abs";
         case ActivationFunction::Sqrt:          return "Sqrt";
         case ActivationFunction::Square:        return "Square";
         case ActivationFunction::Elu:           return "Elu";
         case ActivationFunction::HardSwish:     return "HardSwish";
         default:                                return "Unknown";
     }
 }

◆ GetArgMinMaxFunctionAsCString()

constexpr char const* armnn::GetArgMinMaxFunctionAsCString ( ArgMinMaxFunction function )

Definition at line 47 of file TypesUtils.hpp.

References Max, and Min.

 {
     switch (function)
     {
         case ArgMinMaxFunction::Max:    return "Max";
         case ArgMinMaxFunction::Min:    return "Min";
         default:                        return "Unknown";
     }
 }

◆ GetBiasDataType()

DataType GetBiasDataType ( DataType inputDataType )

Definition at line 27 of file WorkloadData.cpp.

References ARMNN_ASSERT_MSG, ARMNN_LOG, BFloat16, CHECK_LOCATION, TensorInfo::GetDataType(), GetDataTypeName(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetQuantizationDim(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::GetQuantizationScales(), TensorInfo::GetShape(), OptionalBase::has_value(), TensorInfo::HasMultipleQuantizationScales(), TensorInfo::HasPerAxisQuantization(), info, TensorInfo::IsQuantized(), IsQuantized8BitType(), TensorInfo::IsTypeSpaceMatch(), WorkloadInfo::m_InputTensorInfos, WorkloadInfo::m_OutputTensorInfos, OptionalReferenceSwitch< std::is_reference< T >::value, T >::value(), and warning.

Referenced by CompareDepthwiseConvolution2dTestImpl(), TEST_SUITE(), FullyConnectedQueueDescriptor::Validate(), Convolution2dQueueDescriptor::Validate(), Convolution3dQueueDescriptor::Validate(), DepthwiseConvolution2dQueueDescriptor::Validate(), and TransposeConvolution2dQueueDescriptor::Validate().

 {
     switch (inputDataType)
     {
         case DataType::Float16:
             return DataType::Float16;
         case DataType::BFloat16:
         case DataType::Float32:
             return DataType::Float32;
         case DataType::QAsymmS8:
             return DataType::Signed32;
         case DataType::QAsymmU8:
             return DataType::Signed32;
         case DataType::QSymmS8:
             return DataType::Signed32;
         case DataType::QSymmS16:
             return DataType::Signed32;
         default:
             ARMNN_ASSERT_MSG(false, "Invalid input data type");
             return DataType::Float32;
     }
 }

◆ GetBiasTypeFromWeightsType()

armnn::Optional< armnn::DataType > GetBiasTypeFromWeightsType ( armnn::Optional< armnn::DataType > weightsType )

inline

Definition at line 14 of file LayerSupportRules.hpp.

References ARMNN_ASSERT_MSG, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by BiasAndWeightsTypesCompatible::BiasAndWeightsTypesCompatible(), BiasAndWeightsTypesMatch::BiasAndWeightsTypesMatch(), and FullyConnectedTest().

 {
     if (!weightsType)
     {
         return weightsType;
     }
 
     switch(weightsType.value())
     {
         case armnn::DataType::Float16:
         case armnn::DataType::Float32:
             return weightsType;
         case armnn::DataType::QAsymmS8:
         case armnn::DataType::QAsymmU8:
         case armnn::DataType::QSymmS8:
         case armnn::DataType::QSymmS16:
             return armnn::DataType::Signed32;
         default:
             ARMNN_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
     }
     return armnn::EmptyOptional();
 }

◆ GetCapability() [1/2]

Optional< const BackendOptions::BackendOption > GetCapability	(	const std::string &	backendCapabilityName,
		const BackendCapabilities &	capabilities
	)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 30 of file BackendHelper.cpp.

References BackendOptions::GetOption(), and BackendOptions::GetOptionCount().

Referenced by GetCapability(), HasCapability(), LayerSupportHandle::IsConvolution2dSupported(), LayerSupportHandle::IsDepthwiseConvolutionSupported(), LayerSupportHandle::IsDilatedDepthwiseConvolutionSupported(), LayerSupportHandle::IsFullyConnectedSupported(), and TEST_SUITE().

 {
     for (size_t i=0; i < capabilities.GetOptionCount(); i++)
     {
         const auto& capability = capabilities.GetOption(i);
         if (backendCapabilityName == capability.GetName())
         {
             return capability;
         }
     }
     return EmptyOptional();
 }

◆ GetCapability() [2/2]

Optional< const BackendOptions::BackendOption > GetCapability	(	const std::string &	backendCapabilityName,
		const armnn::BackendId &	backend
	)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 44 of file BackendHelper.cpp.

References BackendRegistryInstance(), and GetCapability().

 {
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         auto capabilities = backendObject->GetCapabilities();
         return GetCapability(backendCapabilityName, capabilities);
     }
     return EmptyOptional();
 }

◆ GetClContext()

const armnn::ClContext* armnn::GetClContext ( const void * buf )

inline

Definition at line 140 of file ClContextSchema_generated.h.

Referenced by ClContextDeserializer::DeserializeFromBinary().

                                                            {
   return flatbuffers::GetRoot<armnn::ClContext>(buf);
 }

◆ GetComparisonOperationAsCString()

constexpr char const* armnn::GetComparisonOperationAsCString ( ComparisonOperation operation )

Definition at line 57 of file TypesUtils.hpp.

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, and NotEqual.

Referenced by armnnTfLiteParser::ComputeWrappedIndex(), RefComparisonWorkload::ExecuteAsync(), and StringifyLayerParameters< ComparisonDescriptor >::Serialize().

 {
     switch (operation)
     {
         case ComparisonOperation::Equal:          return "Equal";
         case ComparisonOperation::Greater:        return "Greater";
         case ComparisonOperation::GreaterOrEqual: return "GreaterOrEqual";
         case ComparisonOperation::Less:           return "Less";
         case ComparisonOperation::LessOrEqual:    return "LessOrEqual";
         case ComparisonOperation::NotEqual:       return "NotEqual";
         default:                                  return "Unknown";
     }
 }

◆ GetComputeDeviceAsCString()

constexpr char const* armnn::GetComputeDeviceAsCString ( Compute compute )

Deprecated function that will be removed together with the Compute enum.

Definition at line 34 of file BackendId.hpp.

References CpuAcc, CpuRef, and GpuAcc.

Referenced by GetSuitableBackendRegistered(), operator<<(), and TEST_SUITE().

 {
     switch (compute)
     {
         case armnn::Compute::CpuRef: return "CpuRef";
         case armnn::Compute::CpuAcc: return "CpuAcc";
         case armnn::Compute::GpuAcc: return "GpuAcc";
         default:                     return "Unknown";
     }
 }

◆ GetConvolutionMethodString()

std::string GetConvolutionMethodString ( arm_compute::ConvolutionMethod & convolutionMethod )

inline

Definition at line 37 of file ClWorkloadUtils.hpp.

 {
     switch (convolutionMethod)
     {
         case arm_compute::ConvolutionMethod::FFT:
             return "FFT";
         case arm_compute::ConvolutionMethod::DIRECT:
             return "Direct";
         case arm_compute::ConvolutionMethod::GEMM:
             return "GEMM";
         case arm_compute::ConvolutionMethod::WINOGRAD:
             return "Winograd";
         default:
             return "Unknown";
     }
 }

◆ GetDataLayoutName()

constexpr const char* armnn::GetDataLayoutName ( DataLayout dataLayout )

Definition at line 222 of file TypesUtils.hpp.

References NCDHW, NCHW, NDHWC, and NHWC.

 {
     switch (dataLayout)
     {
         case DataLayout::NCHW:  return "NCHW";
         case DataLayout::NHWC:  return "NHWC";
         case DataLayout::NDHWC: return "NDHWC";
         case DataLayout::NCDHW: return "NCDHW";
         default:                return "Unknown";
     }
 }

◆ GetDataTypeName()

constexpr const char* armnn::GetDataTypeName ( DataType dataType )

Definition at line 202 of file TypesUtils.hpp.

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by armnnTfLiteParser::AsFloatArray(), AttemptBackendAssignment(), CompareConstTensor(), ProfilingDetails::DetailsExist(), GetBiasDataType(), TfLiteParserImpl::GetBuffer(), RefTransposeWorkload< DataType >::GetName(), RefPermuteWorkload< DataType >::GetName(), RefDebugWorkload< DataType >::GetName(), armnnUtils::GetPerAxisParams(), TEST_SUITE(), LayerVerifierBase::VerifyConstTensors(), LayerVerifierBase::VerifyNameAndConnections(), and VerifyTensorInfoDataType().

 {
     switch (dataType)
     {
         case DataType::Float16:               return "Float16";
         case DataType::Float32:               return "Float32";
         case DataType::Signed64:              return "Signed64";
         case DataType::QAsymmU8:              return "QAsymmU8";
         case DataType::QAsymmS8:              return "QAsymmS8";
         case DataType::QSymmS8:               return "QSymmS8";
         case DataType::QSymmS16:              return "QSymm16";
         case DataType::Signed32:              return "Signed32";
         case DataType::Boolean:               return "Boolean";
         case DataType::BFloat16:              return "BFloat16";
 
         default:
             return "Unknown";
     }
 }

◆ GetDataTypeSize()

constexpr unsigned int armnn::GetDataTypeSize ( DataType dataType )

Definition at line 151 of file TypesUtils.hpp.

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by MockTensorHandle::CanBeImported(), RefTensorHandle::CanBeImported(), DepthwiseConvolution2dDepthMul64Test(), RefDepthToSpaceWorkload::ExecuteAsync(), RefStridedSliceWorkload::ExecuteAsync(), RefSliceWorkload::ExecuteAsync(), RefShapeWorkload::ExecuteAsync(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), TensorInfo::GetNumBytes(), GetUnpaddedTensorStrides(), PermuteTensor(), ConvertConstPermuteLayersToConstLayers::Run(), and TEST_SUITE().

 {
     switch (dataType)
     {
         case DataType::BFloat16:
         case DataType::Float16:               return 2U;
         case DataType::Float32:
         case DataType::Signed32:              return 4U;
         case DataType::Signed64:              return 8U;
         case DataType::QAsymmU8:              return 1U;
         case DataType::QAsymmS8:              return 1U;
         case DataType::QSymmS8:               return 1U;
         case DataType::QSymmS16:              return 2U;
         case DataType::Boolean:               return 1U;
         default:                              return 0U;
     }
 }

◆ GetEventPtr() [1/2]

const Event* armnn::GetEventPtr ( const Event * ptr )

Definition at line 109 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

109 { return ptr;}

◆ GetEventPtr() [2/2]

const Event* armnn::GetEventPtr ( const std::unique_ptr< Event > & ptr )

Definition at line 110 of file Profiling.cpp.

110 {return ptr.get(); }

◆ GetGraphForTesting()

Graph & GetGraphForTesting ( IOptimizedNetwork * optNet )

Definition at line 49 of file TestUtils.cpp.

References IOptimizedNetwork::pOptimizedNetworkImpl.

Referenced by CheckRelatedLayers(), and TEST_SUITE().

 {
     return optNet->pOptimizedNetworkImpl->GetGraph();
 }

◆ GetILayerSupportByBackendId()

LayerSupportHandle GetILayerSupportByBackendId ( const armnn::BackendId & backend )

Convenience function to retrieve the ILayerSupportHandle for a backend.

Definition at line 16 of file BackendHelper.cpp.

References BackendRegistryInstance(), BackendRegistry::GetFactory(), and BackendRegistry::IsBackendRegistered().

Referenced by TEST_SUITE().

 {
     BackendRegistry& backendRegistry = armnn::BackendRegistryInstance();
 
     if (!backendRegistry.IsBackendRegistered(backend))
     {
         return LayerSupportHandle(nullptr);
     }
 
     auto factoryFunc = backendRegistry.GetFactory(backend);
     auto backendObject = factoryFunc();
     return LayerSupportHandle(backendObject->GetLayerSupport(), backend);
 }

◆ GetInputTensor()

const armnn::ConstTensor armnn::GetInputTensor	(	const LayerBindingId	layerId,
		const InputTensors &	inputTensors
	)

Definition at line 1309 of file LoadedNetwork.cpp.

 {
     for (auto inputTensorPair : inputTensors)
     {
         LayerBindingId id = inputTensorPair.first;
         if (id == layerId)
         {
             return inputTensorPair.second;
         }
     }
     throw InvalidArgumentException("Input does not exist.");
 }

◆ GetInputTensorData()

const DataType* armnn::GetInputTensorData	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 35 of file RefWorkloadUtils.hpp.

References GetOutputTensorData(), and ITensorHandle::Map().

 {
     const ITensorHandle* tensorHandle = data.m_Inputs[idx];
     return reinterpret_cast<const DataType*>(tensorHandle->Map());
 }

◆ GetInputTensorDataBFloat16()

const BFloat16* armnn::GetInputTensorDataBFloat16	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 79 of file RefWorkloadUtils.hpp.

 {
     return GetInputTensorData<BFloat16>(idx, data);
 }

◆ GetInputTensorDataFloat()

const float* armnn::GetInputTensorDataFloat	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 55 of file RefWorkloadUtils.hpp.

 {
     return GetInputTensorData<float>(idx, data);
 }

◆ GetInputTensorDataHalf()

const Half* armnn::GetInputTensorDataHalf	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 67 of file RefWorkloadUtils.hpp.

 {
     return GetInputTensorData<Half>(idx, data);
 }

◆ GetLayerTypeAsCString()

char const * GetLayerTypeAsCString ( LayerType type )

Definition at line 13 of file InternalTypes.cpp.

References ARMNN_ASSERT_MSG, and LIST_OF_LAYER_TYPE.

 {
     switch (type)
     {
 #define X(name) case LayerType::name: return #name;
       LIST_OF_LAYER_TYPE
 #undef X
         default:
             ARMNN_ASSERT_MSG(false, "Unknown layer type");
             return "Unknown";
     }
 }

◆ GetLogicalBinaryOperationAsCString()

constexpr char const* armnn::GetLogicalBinaryOperationAsCString ( LogicalBinaryOperation operation )

Definition at line 87 of file TypesUtils.hpp.

References LogicalAnd, and LogicalOr.

Referenced by RefLogicalBinaryWorkload::ExecuteAsync().

 {
     switch (operation)
     {
         case LogicalBinaryOperation::LogicalAnd: return "LogicalAnd";
         case LogicalBinaryOperation::LogicalOr:  return "LogicalOr";
         default:                                 return "Unknown";
     }
 }

◆ GetMemBlockStrategyTypeName()

constexpr const char* armnn::GetMemBlockStrategyTypeName ( MemBlockStrategyType memBlockStrategyType )

Definition at line 264 of file TypesUtils.hpp.

References MultiAxisPacking, and SingleAxisPacking.

Referenced by RuntimeImpl::RuntimeImpl().

 {
     switch (memBlockStrategyType)
     {
         case MemBlockStrategyType::SingleAxisPacking: return "SingleAxisPacking";
         case MemBlockStrategyType::MultiAxisPacking:  return "MultiAxisPacking";
         default:                                      return "Unknown";
     }
 }

◆ GetMemoryOptimizerStrategy()

std::unique_ptr<IMemoryOptimizerStrategy> armnn::GetMemoryOptimizerStrategy ( const std::string & strategyName )

Definition at line 36 of file MemoryOptimizerStrategyLibrary.hpp.

Referenced by main(), RuntimeImpl::RuntimeImpl(), and TEST_SUITE().

 {
      const auto& strategyFactoryMap = GetStrategyFactories();
      auto strategyFactory = strategyFactoryMap.find(strategyName);
      if (strategyFactory != GetStrategyFactories().end())
      {
          return  strategyFactory->second->CreateMemoryOptimizerStrategy();
      }
     return nullptr;
 }

◆ GetMemoryOptimizerStrategyNames()

const std::vector<std::string> armnn::GetMemoryOptimizerStrategyNames ( )

Definition at line 47 of file MemoryOptimizerStrategyLibrary.hpp.

Referenced by ParseOptions(), and TEST_SUITE().

 {
     const auto& strategyFactoryMap = GetStrategyFactories();
     std::vector<std::string> strategyNames;
     for (const auto& strategyFactory : strategyFactoryMap)
     {
         strategyNames.emplace_back(strategyFactory.first);
     }
     return strategyNames;
 }

◆ GetModelOptionsForTesting()

ModelOptions & GetModelOptionsForTesting ( IOptimizedNetwork * optNet )

Definition at line 54 of file TestUtils.cpp.

References IOptimizedNetwork::pOptimizedNetworkImpl.

Referenced by CheckRelatedLayers(), and TEST_SUITE().

 {
     return optNet->pOptimizedNetworkImpl->GetModelOptions();
 }

◆ GetNormalizationAlgorithmChannelAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmChannelAsCString ( NormalizationAlgorithmChannel channel )

Definition at line 234 of file TypesUtils.hpp.

References Across, and Within.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

 {
     switch (channel)
     {
         case NormalizationAlgorithmChannel::Across: return "Across";
         case NormalizationAlgorithmChannel::Within: return "Within";
         default:                                    return "Unknown";
     }
 }

◆ GetNormalizationAlgorithmMethodAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmMethodAsCString ( NormalizationAlgorithmMethod method )

Definition at line 244 of file TypesUtils.hpp.

References LocalBrightness, and LocalContrast.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

 {
     switch (method)
     {
         case NormalizationAlgorithmMethod::LocalBrightness: return "LocalBrightness";
         case NormalizationAlgorithmMethod::LocalContrast:   return "LocalContrast";
         default:                                            return "Unknown";
     }
 }

◆ GetNumActivations()

unsigned int armnn::GetNumActivations ( const TensorInfo & inputInfo )

Definition at line 16 of file RefFullyConnectedWorkload.cpp.

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

 {
     unsigned int numActivations = 1; // Total number of activations in the input.
     for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++)
     {
         numActivations *= inputInfo.GetShape()[i];
     }
     return numActivations;
 }

◆ GetNumberOfCacheFiles()

unsigned int GetNumberOfCacheFiles ( const armnn::BackendId & backend )

Returns the number of cached files if backend supports caching.

Definition at line 129 of file BackendHelper.cpp.

References BackendRegistryInstance().

 {
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         return backendObject->GetNumberOfCacheFiles();
     }
     return 0;
 }

◆ GetNumInputs()

uint32_t armnn::GetNumInputs ( bool biasEnabled )

Definition at line 428 of file Descriptors.cpp.

Referenced by FullyConnectedDescriptor::GetNumInputs(), Convolution2dDescriptor::GetNumInputs(), Convolution3dDescriptor::GetNumInputs(), DepthwiseConvolution2dDescriptor::GetNumInputs(), FullyConnectedDescriptor::GetNumViews(), FullyConnectedDescriptor::operator==(), Convolution2dDescriptor::operator==(), Convolution3dDescriptor::operator==(), and DepthwiseConvolution2dDescriptor::operator==().

 {
     unsigned int numInputs = 2;
     if (biasEnabled)
     {
         numInputs = 3;
     }
     return numInputs;
 }

◆ GetOffset()

unsigned int armnn::GetOffset	(	const TensorShape &	shape,
		unsigned int	b,
		unsigned int	h,
		unsigned int	w,
		unsigned int	c,
		const DataLayoutIndexed &	dataLayout
	)

Definition at line 15 of file SpaceToBatchNd.cpp.

References DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), and NHWC.

Referenced by SpaceToBatchNd(), and SpaceToDepth().

 {
     if (dataLayout.GetDataLayout() == DataLayout::NHWC)
     {
         return ((b * shape[dataLayout.GetHeightIndex()] + h) * shape[dataLayout.GetWidthIndex()] + w) *
                shape[dataLayout.GetChannelsIndex()] + c;
     }
     else
     {
         return ((b * shape[dataLayout.GetChannelsIndex()] + c) * shape[dataLayout.GetHeightIndex()] + h) *
                shape[dataLayout.GetWidthIndex()] + w;
     }
 }

◆ GetOutputShapeRoundingAsCString()

constexpr char const* armnn::GetOutputShapeRoundingAsCString ( OutputShapeRounding rounding )

Definition at line 108 of file TypesUtils.hpp.

References Ceiling, and Floor.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

 {
     switch (rounding)
     {
         case OutputShapeRounding::Ceiling:  return "Ceiling";
         case OutputShapeRounding::Floor:    return "Floor";
         default:                            return "Unknown";
     }
 }

◆ GetOutputTensor()

const armnn::Tensor armnn::GetOutputTensor	(	const LayerBindingId	layerId,
		const OutputTensors &	outputTensors
	)

Definition at line 1322 of file LoadedNetwork.cpp.

 {
     for (auto outputTensorPair : outputTensors)
     {
         LayerBindingId id = outputTensorPair.first;
         if (id == layerId)
         {
             return outputTensorPair.second;
         }
     }
     throw InvalidArgumentException("Output does not exist.");
 }

◆ GetOutputTensorData() [1/2]

DataType* armnn::GetOutputTensorData ( ITensorHandle * tensorHandle )

Definition at line 49 of file RefWorkloadUtils.hpp.

References ITensorHandle::Map().

 {
     return reinterpret_cast<DataType*>(tensorHandle->Map());
 }

◆ GetOutputTensorData() [2/2]

DataType * GetOutputTensorData	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 168 of file ClWorkloadUtils.hpp.

References ITensorHandle::Map().

Referenced by GetInputTensorData(), and SetNeonSliceData().

 {
     ITensorHandle* tensorHandle = data.m_Outputs[idx];
     return reinterpret_cast<DataType*>(tensorHandle->Map());
 }

◆ GetOutputTensorDataBFloat16()

BFloat16* armnn::GetOutputTensorDataBFloat16	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 85 of file RefWorkloadUtils.hpp.

 {
     return GetOutputTensorData<BFloat16>(idx, data);
 }

◆ GetOutputTensorDataFloat()

float* armnn::GetOutputTensorDataFloat	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 61 of file RefWorkloadUtils.hpp.

 {
     return GetOutputTensorData<float>(idx, data);
 }

◆ GetOutputTensorDataHalf()

Half* armnn::GetOutputTensorDataHalf	(	unsigned int	idx,
		const PayloadType &	data
	)

Definition at line 73 of file RefWorkloadUtils.hpp.

 {
     return GetOutputTensorData<Half>(idx, data);
 }

◆ GetPaddingMethodAsCString()

constexpr char const* armnn::GetPaddingMethodAsCString ( PaddingMethod method )

Definition at line 118 of file TypesUtils.hpp.

References Exclude, and IgnoreValue.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

 {
     switch (method)
     {
         case PaddingMethod::Exclude:       return "Exclude";
         case PaddingMethod::IgnoreValue:   return "IgnoreValue";
         default:                           return "Unknown";
     }
 }

◆ GetPaddingModeAsCString()

constexpr char const* armnn::GetPaddingModeAsCString ( PaddingMode mode )

Definition at line 128 of file TypesUtils.hpp.

References Constant, Reflect, and Symmetric.

Referenced by StringifyLayerParameters< PadDescriptor >::Serialize().

 {
     switch (mode)
     {
         case PaddingMode::Constant:   return "Exclude";
         case PaddingMode::Symmetric:  return "Symmetric";
         case PaddingMode::Reflect:    return "Reflect";
         default:                      return "Unknown";
     }
 }

◆ GetPoolingAlgorithmAsCString()

constexpr char const* armnn::GetPoolingAlgorithmAsCString ( PoolingAlgorithm pooling )

Definition at line 97 of file TypesUtils.hpp.

References Average, L2, and Max.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

 {
     switch (pooling)
     {
         case PoolingAlgorithm::Average:  return "Average";
         case PoolingAlgorithm::Max:      return "Max";
         case PoolingAlgorithm::L2:       return "L2";
         default:                         return "Unknown";
     }
 }

◆ GetProfilerEventSequenceSize()

size_t armnn::GetProfilerEventSequenceSize ( armnn::IProfiler * profiler )

Definition at line 19 of file ProfilerTests.cpp.

References ProfilerManager::GetInstance(), ProfilerManager::GetProfiler(), and ProfilerManager::RegisterProfiler().

Referenced by TEST_SUITE().

 {
     if (!profiler)
     {
         return static_cast<size_t>(-1);
     }
 
     return profiler->pProfilerImpl->m_EventSequence.size();
 }

◆ GetProfilingService()

arm::pipe::IProfilingService & GetProfilingService ( armnn::RuntimeImpl * runtime )

Definition at line 59 of file TestUtils.cpp.

Referenced by CheckRelatedLayers(), TEST_SUITE(), and VerifyPostOptimisationStructureTestImpl().

 {
     return *(runtime->m_ProfilingService.get());
 }

◆ GetReduceOperationAsCString()

constexpr char const* armnn::GetReduceOperationAsCString ( ReduceOperation reduce_operation )

Definition at line 139 of file TypesUtils.hpp.

References Max, Mean, Min, Prod, and Sum.

Referenced by StringifyLayerParameters< ReduceDescriptor >::Serialize().

 {
     switch (reduce_operation)
     {
         case ReduceOperation::Sum:  return "Sum";
         case ReduceOperation::Max:  return "Max";
         case ReduceOperation::Mean: return "Mean";
         case ReduceOperation::Min:  return "Min";
         case ReduceOperation::Prod: return "Prod";
         default:                    return "Unknown";
     }
 }

◆ GetResizeMethodAsCString()

constexpr const char* armnn::GetResizeMethodAsCString ( ResizeMethod method )

Definition at line 254 of file TypesUtils.hpp.

References Bilinear, and NearestNeighbor.

Referenced by StringifyLayerParameters< ResizeDescriptor >::Serialize().

 {
     switch (method)
     {
         case ResizeMethod::Bilinear:        return "Bilinear";
         case ResizeMethod::NearestNeighbor: return "NearestNeighbour";
         default:                            return "Unknown";
     }
 }

◆ GetSizePrefixedClContext()

const armnn::ClContext* armnn::GetSizePrefixedClContext ( const void * buf )

inline

Definition at line 144 of file ClContextSchema_generated.h.

                                                                        {
   return flatbuffers::GetSizePrefixedRoot<armnn::ClContext>(buf);
 }

◆ GetStatusAsCString()

constexpr char const* armnn::GetStatusAsCString ( Status status )

Definition at line 17 of file TypesUtils.hpp.

References Failure, and Success.

Referenced by operator<<().

 {
     switch (status)
     {
         case armnn::Status::Success: return "Status::Success";
         case armnn::Status::Failure: return "Status::Failure";
         default:                     return "Unknown";
     }
 }

◆ GetTensorInfo()

const TensorInfo& armnn::GetTensorInfo ( const ITensorHandle * tensorHandle )

inline

float32 helpers

Definition at line 26 of file RefWorkloadUtils.hpp.

References RefTensorHandle::GetTensorInfo().

 {
     // We know that reference workloads use RefTensorHandles for inputs and outputs
     const RefTensorHandle* refTensorHandle =
         PolymorphicDowncast<const RefTensorHandle*>(tensorHandle);
     return refTensorHandle->GetTensorInfo();
 }

◆ GetTimeDuration()

std::chrono::duration<double, std::milli> armnn::GetTimeDuration ( std::chrono::high_resolution_clock::time_point start_time )

inline

Definition at line 19 of file Timer.hpp.

References GetTimeNow().

Referenced by CheckInferenceTimeThreshold(), RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), InferenceModel< IParser, TDataType >::InferenceModel(), MainImpl(), InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), RuntimeImpl::RuntimeImpl(), and RuntimeImpl::~RuntimeImpl().

 {
     return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
 }

◆ GetTimeNow()

std::chrono::high_resolution_clock::time_point armnn::GetTimeNow ( )

inline

Definition at line 14 of file Timer.hpp.

Referenced by CheckInferenceTimeThreshold(), RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), GetTimeDuration(), InferenceModel< IParser, TDataType >::InferenceModel(), MainImpl(), InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), RuntimeImpl::RuntimeImpl(), Threadpool::TerminateThreadPool(), and RuntimeImpl::~RuntimeImpl().

 {
     return std::chrono::high_resolution_clock::now();
 }

◆ GetUnaryOperationAsCString()

constexpr char const* armnn::GetUnaryOperationAsCString ( UnaryOperation operation )

Definition at line 71 of file TypesUtils.hpp.

References Abs, Exp, Log, LogicalNot, Neg, Rsqrt, Sin, and Sqrt.

Referenced by armnnTfLiteParser::ComputeWrappedIndex(), RefLogicalUnaryWorkload::ExecuteAsync(), RefElementwiseUnaryWorkload::ExecuteAsync(), StringifyLayerParameters< ElementwiseUnaryDescriptor >::Serialize(), and TEST_SUITE().

 {
     switch (operation)
     {
         case UnaryOperation::Abs:        return "Abs";
         case UnaryOperation::Exp:        return "Exp";
         case UnaryOperation::Sqrt:       return "Sqrt";
         case UnaryOperation::Rsqrt:      return "Rsqrt";
         case UnaryOperation::Neg:        return "Neg";
         case UnaryOperation::Log:        return "Log";
         case UnaryOperation::LogicalNot: return "LogicalNot";
         case UnaryOperation::Sin:        return "Sin";
         default:                         return "Unknown";
     }
 }

◆ GetUnpaddedTensorStrides()

TensorShape GetUnpaddedTensorStrides ( const TensorInfo & tensorInfo )

Definition at line 15 of file TensorHandle.cpp.

References TensorInfo::GetDataType(), GetDataTypeSize(), and TensorInfo::GetShape().

Referenced by MockTensorHandle::GetStrides(), SampleTensorHandle::GetStrides(), RefTensorHandle::GetStrides(), and ConstTensorHandle::GetStrides().

 {
     TensorShape shape(tensorInfo.GetShape());
     auto size = GetDataTypeSize(tensorInfo.GetDataType());
     auto runningSize = size;
     std::vector<unsigned int> strides(shape.GetNumDimensions());
     auto lastIdx = shape.GetNumDimensions()-1;
     for (unsigned int i=0; i < lastIdx ; i++)
     {
         strides[lastIdx-i] = runningSize;
         runningSize *= shape[lastIdx-i];
     }
     strides[0] = runningSize;
     return TensorShape(shape.GetNumDimensions(), strides.data());
 }

◆ GetVersion()

const std::string GetVersion ( )

Definition at line 77 of file Utils.cpp.

References ARMNN_VERSION.

 {
     return ARMNN_VERSION;
 }

◆ HasCapability() [1/4]

bool HasCapability	(	const std::string &	name,
		const BackendCapabilities &	capabilities
	)

Convenience function to check if a capability exists in a BackendCapabilites struct.

Definition at line 58 of file BackendHelper.cpp.

References GetCapability().

Referenced by HasCapability(), LoadedNetwork::ImportInputs(), LoadedNetwork::ImportOutputs(), LoadedNetwork::MakeLoadedNetwork(), RuntimeImpl::RuntimeImpl(), and TEST_SUITE().

 {
     return GetCapability(name, capabilities).has_value();
 }

◆ HasCapability() [2/4]

bool HasCapability	(	const std::string &	name,
		const armnn::BackendId &	backend
	)

Convenience function to check if a capability exists in a backend.

Definition at line 63 of file BackendHelper.cpp.

References GetCapability().

 {
     return GetCapability(name, backend).has_value();
 }

◆ HasCapability() [3/4]

bool HasCapability	(	const BackendOptions::BackendOption &	capability,
		const BackendCapabilities &	capabilities
	)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 68 of file BackendHelper.cpp.

References BackendOptions::Var::AsBool(), BackendOptions::Var::AsFloat(), BackendOptions::Var::AsInt(), BackendOptions::Var::AsString(), BackendOptions::Var::AsUnsignedInt(), BackendOptions::BackendOption::GetName(), BackendOptions::GetOption(), BackendOptions::GetOptionCount(), BackendOptions::BackendOption::GetValue(), BackendOptions::Var::IsBool(), BackendOptions::Var::IsFloat(), BackendOptions::Var::IsInt(), BackendOptions::Var::IsString(), and BackendOptions::Var::IsUnsignedInt().

 {
     for (size_t i=0; i < capabilities.GetOptionCount(); i++)
     {
         const auto& backendCapability = capabilities.GetOption(i);
         if (capability.GetName() == backendCapability.GetName())
         {
             if (capability.GetValue().IsBool() && backendCapability.GetValue().IsBool())
             {
                 return capability.GetValue().AsBool() == backendCapability.GetValue().AsBool();
             }
             else if(capability.GetValue().IsFloat() && backendCapability.GetValue().IsFloat())
             {
                 return capability.GetValue().AsFloat() == backendCapability.GetValue().AsFloat();
             }
             else if(capability.GetValue().IsInt() && backendCapability.GetValue().IsInt())
             {
                 return capability.GetValue().AsInt() == backendCapability.GetValue().AsInt();
             }
             else if(capability.GetValue().IsString() && backendCapability.GetValue().IsString())
             {
                 return capability.GetValue().AsString() == backendCapability.GetValue().AsString();
             }
             else if(capability.GetValue().IsUnsignedInt() && backendCapability.GetValue().IsUnsignedInt())
             {
                 return capability.GetValue().AsUnsignedInt() == backendCapability.GetValue().AsUnsignedInt();
             }
         }
     }
     return false;
 }

◆ HasCapability() [4/4]

bool HasCapability	(	const BackendOptions::BackendOption &	backendOption,
		const armnn::BackendId &	backend
	)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 100 of file BackendHelper.cpp.

References BackendRegistryInstance(), and HasCapability().

 {
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         auto capabilities = backendObject->GetCapabilities();
         return HasCapability(backendOption, capabilities);
     }
     return false;
 }

◆ IgnoreUnused()

void armnn::IgnoreUnused ( Ts && ... )

inline

Definition at line 14 of file IgnoreUnused.hpp.

14 {}

◆ InitializeArmComputeClTensorData()

void armnn::InitializeArmComputeClTensorData	(	arm_compute::CLTensor &	clTensor,
		const ConstTensorHandle *	handle
	)

inline

Definition at line 115 of file ClWorkloadUtils.hpp.

References ARMNN_ASSERT.

 {
     ARMNN_ASSERT(handle);
 
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
     switch(handle->GetTensorInfo().GetDataType())
     {
         case DataType::Float16:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
             break;
         case DataType::Float32:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
             break;
         case DataType::QAsymmU8:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
             break;
         case DataType::QAsymmS8:
         case DataType::QSymmS8:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>());
             break;
         case DataType::QSymmS16:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int16_t>());
             break;
         case DataType::Signed32:
             CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
             break;
         default:
             ARMNN_ASSERT_MSG(false, "Unexpected tensor type.");
     }
 };

◆ InitializeArmComputeTensorData()

void armnn::InitializeArmComputeTensorData	(	arm_compute::Tensor &	tensor,
		const ConstTensorHandle *	handle
	)

inline

Definition at line 60 of file NeonWorkloadUtils.hpp.

References ARMNN_ASSERT, ARMNN_ASSERT_MSG, CopyArmComputeTensorData(), Float16, Float32, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), ConstTensorHandle::GetTensorInfo(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

 {
     ARMNN_ASSERT(handle);
 
     switch(handle->GetTensorInfo().GetDataType())
     {
         case DataType::Float16:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::Half>());
             break;
         case DataType::Float32:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<float>());
             break;
         case DataType::QAsymmU8:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<uint8_t>());
             break;
         case DataType::QSymmS8:
         case DataType::QAsymmS8:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<int8_t>());
             break;
         case DataType::Signed32:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<int32_t>());
             break;
         case DataType::QSymmS16:
             CopyArmComputeTensorData(tensor, handle->GetConstTensor<int16_t>());
             break;
         default:
             ARMNN_ASSERT_MSG(false, "Unexpected tensor type.");
     }
 };

◆ InsertConvertBf16ToFp32LayersBefore()

std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore	(	Graph &	graph,
		Layer &	layer,
		bool	expectCorrectInputType
	)

Definition at line 51 of file NetworkUtils.cpp.

References Layer::BeginInputSlots(), BFloat16, Layer::EndInputSlots(), Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment().

 {
     std::vector<ConvertBf16ToFp32Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());
 
     // Insert a ConvertBf16ToFp32Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;
         if (expectCorrectInputType)
         {
             // Only insert ConvertBf16ToFp32Layer before BF16 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
         }
 
         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertBf16ToFp32Layer* convertLayer =
                 graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
 
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float32);
 
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
 
             convertLayers.emplace_back(convertLayer);
         }
     }
 
     return convertLayers;
 }

◆ InsertConvertFp16ToFp32LayersBefore()

std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore	(	Graph &	graph,
		Layer &	layer,
		bool	expectCorrectInputType
	)

Definition at line 138 of file NetworkUtils.cpp.

References Layer::BeginInputSlots(), Layer::EndInputSlots(), Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), ConvertFp32NetworkToFp16Impl::Run(), and TEST_SUITE().

 {
     std::vector<ConvertFp16ToFp32Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());
 
     // Insert a ConvertFp16ToFp32Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;
         if (expectCorrectInputType)
         {
             // Only insert ConvertFp16ToFp32Layer before FP16 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
         }
 
         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertFp16ToFp32Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
 
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float32);
 
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
 
             convertLayers.emplace_back(convertLayer);
         }
     }
 
     return convertLayers;
 }

◆ InsertConvertFp32ToBf16LayersAfter()

std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter	(	Graph &	graph,
		Layer &	layer
	)

Definition at line 177 of file NetworkUtils.cpp.

References BFloat16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment().

 {
     const unsigned int numOutputSlots = layer.GetNumOutputSlots();
 
     std::vector<ConvertFp32ToBf16Layer*> convertLayers;
     convertLayers.reserve(numOutputSlots);
 
     // Update Bf16 output slots to FP32 on current layer
     ChangeOutputBf16ToFp32(layer);
 
     // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
     {
         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
         {
             const std::string name =
                 std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
             ConvertFp32ToBf16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
 
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::BFloat16);
 
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
 
             convertLayers.emplace_back(convertLayer);
         }
     }
 
     return convertLayers;
 }

◆ InsertConvertFp32ToBf16LayersBefore()

std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersBefore	(	Graph &	graph,
		Layer &	layer,
		bool	expectCorrectInputType
	)

Definition at line 90 of file NetworkUtils.cpp.

References Layer::BeginInputSlots(), BFloat16, Convolution2d, DepthwiseConvolution2d, Layer::EndInputSlots(), Float32, FullyConnected, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Layer::GetType(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by ConvertFp32NetworkToBf16Impl::Run().

 {
     std::vector<ConvertFp32ToBf16Layer*> convertLayers;
     convertLayers.reserve(layer.GetNumInputSlots());
 
     // Insert a ConvertFp32ToBf16Layer before each input slot
     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
     {
         bool allowInsert = true;
 
         if ((layer.GetType() == LayerType::Convolution2d ||
              layer.GetType() == LayerType::FullyConnected ||
              layer.GetType() == LayerType::DepthwiseConvolution2d)
                 && inputSlot->GetSlotIndex() == 2)
         {
             // Refrain from reducing bias to Bf16
             continue;
         }
         if (expectCorrectInputType)
         {
             // Only insert ConvertFp32ToBf16Layer before FP32 input slots
             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
             allowInsert =
                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
         }
 
         if (allowInsert)
         {
             const std::string name =
                 std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
                 layer.GetName();
             ConvertFp32ToBf16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
 
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::BFloat16);
 
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
 
             convertLayers.emplace_back(convertLayer);
         }
     }
 
     return convertLayers;
 }

◆ InsertConvertFp32ToFp16LayersAfter()

std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter	(	Graph &	graph,
		Layer &	layer
	)

Definition at line 210 of file NetworkUtils.cpp.

References Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), ConvertFp32NetworkToFp16Impl::Run(), and TEST_SUITE().

 {
     const unsigned int numOutputSlots = layer.GetNumOutputSlots();
 
     std::vector<ConvertFp32ToFp16Layer*> convertLayers;
     convertLayers.reserve(numOutputSlots);
 
     // Update FP16 output slots to FP32 on current layer
     ChangeOutputFp16ToFp32(layer);
 
     // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
     {
         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
         {
             const std::string name =
                 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
             ConvertFp32ToFp16Layer* convertLayer =
                 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
 
             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
             convertInfo.SetDataType(DataType::Float16);
 
             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
 
             convertLayers.emplace_back(convertLayer);
         }
     }
 
     return convertLayers;
 }

◆ InsertDebugLayerAfter()

std::vector< DebugLayer * > InsertDebugLayerAfter	(	Graph &	graph,
		Layer &	layer
	)

Definition at line 243 of file NetworkUtils.cpp.

References ARMNN_ASSERT, Layer::BeginOutputSlots(), CpuRef, Layer::EndOutputSlots(), InputSlot::GetConnectedOutputSlot(), Layer::GetInputSlot(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AddDebugImpl::Run().

 {
     std::vector<DebugLayer*> debugLayers;
     debugLayers.reserve(layer.GetNumOutputSlots());
 
     // Connect a DebugLayer to each output slot of the layer
     uint32_t outputSlotIdx = 0;
     for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
     {
         const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
             std::to_string(outputSlotIdx);
 
         DebugLayer* debugLayer =
             graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());
 
         // Sets output tensor info for the debug layer.
         ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
         TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
 
         debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
 
         // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
         debugLayer->SetBackendId(Compute::CpuRef);
 
         debugLayers.emplace_back(debugLayer);
 
         ++outputSlotIdx;
     }
 
     return debugLayers;
 }

◆ InstanceNorm()

void InstanceNorm	(	const InstanceNormalizationQueueDescriptor &	data,
		const TensorInfo &	inputInfo,
		Decoder< float > &	inputDecoder,
		Encoder< float > &	outputEncoder
	)

Definition at line 18 of file InstanceNorm.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), InstanceNormalizationDescriptor::m_Beta, InstanceNormalizationDescriptor::m_DataLayout, InstanceNormalizationDescriptor::m_Eps, InstanceNormalizationDescriptor::m_Gamma, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

Referenced by RefInstanceNormalizationWorkload::ExecuteAsync().

 {
     const TensorShape inputShape = inputInfo.GetShape();
 
     armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
 
     unsigned int inputBatches  = inputShape[0];
     unsigned int inputHeight   = inputShape[dataLayout.GetHeightIndex()];
     unsigned int inputWidth    = inputShape[dataLayout.GetWidthIndex()];
     unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
 
     float beta  = data.m_Parameters.m_Beta;
     float eps   = data.m_Parameters.m_Eps;
     float gamma = data.m_Parameters.m_Gamma;
 
     for (unsigned int n = 0; n < inputBatches; ++n)
     {
         for (unsigned int c = 0; c < inputChannels; ++c)
         {
             float mean = 0, var = 0;
 
             //Calculate Mean
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth; w++)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
 
                     inputDecoder[index];
                     float value = inputDecoder.Get();
                     mean += value;
                 }
             }
             mean /= static_cast<float>(inputHeight * inputWidth);
 
             //Calculate Variance
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth; w++)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
 
                     inputDecoder[index];
                     float value = inputDecoder.Get();
                     var += (value - mean) * (value - mean);
                 }
             }
             var /= static_cast<float>(inputHeight * inputWidth);
 
             // Apply Instance Normalisation
             for (unsigned int h = 0; h < inputHeight; ++h)
             {
                 for (unsigned int w = 0; w < inputWidth; ++w)
                 {
                     unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
                     inputDecoder[index];
                     outputEncoder[index];
                     outputEncoder.Set((inputDecoder.Get() - mean) * gamma /  std::sqrt ( var + eps) + beta);
                 }
 
             }
         }
     }
 }

◆ IntersectionOverUnion()

float IntersectionOverUnion	(	const float *	boxI,
		const float *	boxJ
	)

Definition at line 30 of file DetectionPostProcess.cpp.

Referenced by NonMaxSuppression(), and TEST_SUITE().

 {
     // Box-corner format: ymin, xmin, ymax, xmax.
     const int yMin = 0;
     const int xMin = 1;
     const int yMax = 2;
     const int xMax = 3;
     float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
     float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
     float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
     float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
     float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
     float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
     float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
                                 std::max(xMaxIntersection - xMinIntersection, 0.0f);
     float areaUnion = areaI + areaJ - areaIntersection;
     return areaIntersection / areaUnion;
 }

◆ IsActivationSupported()

bool armnn::IsActivationSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const ActivationDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and ILayerSupport::~ILayerSupport().

◆ IsAdditionSupported()

bool armnn::IsAdditionSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsBatchNormalizationSupported()

bool armnn::IsBatchNormalizationSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	mean,
		const TensorInfo &	var,
		const TensorInfo &	beta,
		const TensorInfo &	gamma,
		const BatchNormalizationDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsBatchToSpaceNdSupported()

bool armnn::IsBatchToSpaceNdSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const BatchToSpaceNdDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsBFloat16()

bool armnn::IsBFloat16 ( const WorkloadInfo & info )

Definition at line 53 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::BFloat16>(info);
 }

◆ IsCapabilitySupported()

bool IsCapabilitySupported	(	const armnn::BackendId &	backend,
		armnn::BackendCapability	capability
	)

Convenience function to check a capability on a backend.

Definition at line 114 of file BackendHelper.cpp.

References ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, and BackendRegistryInstance().

 {
     bool hasCapability = false;
     auto const& backendRegistry = armnn::BackendRegistryInstance();
     if (backendRegistry.IsBackendRegistered(backend))
     {
         auto factoryFunc = backendRegistry.GetFactory(backend);
         auto backendObject = factoryFunc();
         ARMNN_NO_DEPRECATE_WARN_BEGIN
         hasCapability = backendObject->HasCapability(capability);
         ARMNN_NO_DEPRECATE_WARN_END
     }
     return hasCapability;
 }

◆ IsConcatSupported()

bool armnn::IsConcatSupported	(	const BackendId &	backend,
		const std::vector< const TensorInfo *>	inputs,
		const TensorInfo &	output,
		const OriginsDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConstantSupported()

bool armnn::IsConstantSupported	(	const BackendId &	backend,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConvertFp16ToFp32Supported()

bool armnn::IsConvertFp16ToFp32Supported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConvertFp32ToFp16Supported()

bool armnn::IsConvertFp32ToFp16Supported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConvolution2dSupported()

bool armnn::IsConvolution2dSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsDataType()

bool armnn::IsDataType ( const WorkloadInfo & info )

Definition at line 32 of file RefWorkloadFactory.cpp.

References WorkloadInfo::m_InputTensorInfos, and WorkloadInfo::m_OutputTensorInfos.

 {
     auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
     auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
     if (it != std::end(info.m_InputTensorInfos))
     {
         return true;
     }
     it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
     if (it != std::end(info.m_OutputTensorInfos))
     {
         return true;
     }
     return false;
 }

◆ IsDebugSupported()

bool armnn::IsDebugSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsDepthwiseConvolutionSupported()

bool armnn::IsDepthwiseConvolutionSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsDequantizeSupported()

bool armnn::IsDequantizeSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsDivisionSupported()

bool armnn::IsDivisionSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsEqualSupported()

bool armnn::IsEqualSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

◆ IsFakeQuantizationSupported()

bool armnn::IsFakeQuantizationSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const FakeQuantizationDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsFloat16()

bool armnn::IsFloat16 ( const WorkloadInfo & info )

Definition at line 58 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::Float16>(info);
 }

◆ IsFloorSupported()

bool armnn::IsFloorSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsFullyConnectedSupported()

bool armnn::IsFullyConnectedSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	weights,
		const TensorInfo &	biases,
		const FullyConnectedDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsGreaterSupported()

bool armnn::IsGreaterSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

◆ IsInputSupported()

bool armnn::IsInputSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsL2NormalizationSupported()

bool armnn::IsL2NormalizationSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const L2NormalizationDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsLayerOptimizable() [1/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer * layer )

Definition at line 85 of file MockBackend.cpp.

References ARMNN_ASSERT, and Layer::GetName().

Referenced by IsLayerOptimizable(), and MockBackend::OptimizeSubgraphView().

 {
     ARMNN_ASSERT(layer != nullptr);
 
     // A Layer is not optimizable if its name contains "unoptimizable"
     const std::string layerName(layer->GetName());
     bool optimizable = layerName.find("unoptimizable") == std::string::npos;
 
     return optimizable;
 }

◆ IsLayerOptimizable() [2/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer & layer )

Definition at line 96 of file MockBackend.cpp.

References IsLayerOptimizable().

 {
     return IsLayerOptimizable(&layer);
 }

◆ IsLayerSupported() [1/2]

bool armnn::IsLayerSupported ( const armnn::Layer * layer )

Definition at line 60 of file MockBackend.cpp.

References Addition, ARMNN_ASSERT, Constant, Convolution2d, Layer::GetType(), Input, and Output.

Referenced by SampleDynamicWorkloadFactory::IsLayerSupported().

 {
     ARMNN_ASSERT(layer != nullptr);
 
     armnn::LayerType layerType = layer->GetType();
     switch (layerType)
     {
         case armnn::LayerType::Input:
         case armnn::LayerType::Output:
         case armnn::LayerType::Constant:
         case armnn::LayerType::Addition:
         case armnn::LayerType::Convolution2d:
             // Layer supported
             return true;
         default:
             // Layer unsupported
             return false;
     }
 }

◆ IsLayerSupported() [2/2]

bool armnn::IsLayerSupported ( const armnn::Layer & layer )

Definition at line 80 of file MockBackend.cpp.

References IWorkloadFactory::IsLayerSupported().

 {
     return IsLayerSupported(&layer);
 }

◆ IsLstmSupported()

bool armnn::IsLstmSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	scratchBuffer,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const LstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMaximumSupported()

bool armnn::IsMaximumSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnSupported = `nullptr`,
		size_t	reasonIfUnSupportedMaxLength = `0`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMeanSupported()

bool armnn::IsMeanSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const MeanDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMemCopySupported()

bool armnn::IsMemCopySupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMergeSupported()

bool armnn::IsMergeSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMinimumSupported()

bool armnn::IsMinimumSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMultiplicationSupported()

bool armnn::IsMultiplicationSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsNormalizationSupported()

bool armnn::IsNormalizationSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const NormalizationDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsOperationQueueDescriptor() [1/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const QueueDescriptorType & )

Definition at line 18 of file RefWorkloadFactory.hpp.

18 { return true; }

◆ IsOperationQueueDescriptor() [2/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const MemCopyQueueDescriptor & )

Definition at line 21 of file RefWorkloadFactory.hpp.

21 { return false; }

◆ IsOperationQueueDescriptor() [3/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const ConstantQueueDescriptor & )

Definition at line 24 of file RefWorkloadFactory.hpp.

24 { return false; }

◆ IsOperationQueueDescriptor() [4/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const PermuteQueueDescriptor & )

Definition at line 27 of file RefWorkloadFactory.hpp.

27 { return false; }

◆ IsOutputSupported()

bool armnn::IsOutputSupported	(	const BackendId &	backend,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsPadSupported()

bool armnn::IsPadSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const PadDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPermuteSupported()

bool armnn::IsPermuteSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const PermuteDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPooling2dSupported()

bool armnn::IsPooling2dSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling2dDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPreCompiledSupported()

bool armnn::IsPreCompiledSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPreluSupported()

bool armnn::IsPreluSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	alpha,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsQAsymmS8()

bool armnn::IsQAsymmS8 ( const WorkloadInfo & info )

Definition at line 73 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::QAsymmS8>(info);
 }

◆ IsQAsymmU8()

bool armnn::IsQAsymmU8 ( const WorkloadInfo & info )

Definition at line 78 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::QAsymmU8>(info);
 }

◆ IsQSymmS16()

bool armnn::IsQSymmS16 ( const WorkloadInfo & info )

Definition at line 63 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::QSymmS16>(info);
 }

◆ IsQSymmS8()

bool armnn::IsQSymmS8 ( const WorkloadInfo & info )

Definition at line 68 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::QSymmS8>(info);
 }

◆ IsQuantized8BitType()

constexpr bool armnn::IsQuantized8BitType ( DataType dataType )

Definition at line 285 of file TypesUtils.hpp.

References QAsymmS8, QAsymmU8, and QSymmS8.

Referenced by GetBiasDataType(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), IsQuantizedType(), and RefLayerSupport::IsTransposeConvolution2dSupported().

 {
     return dataType == DataType::QAsymmU8        ||
            dataType == DataType::QAsymmS8        ||
            dataType == DataType::QSymmS8;
 }

◆ IsQuantizedLstmSupported()

bool armnn::IsQuantizedLstmSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	previousCellStateIn,
		const TensorInfo &	previousOutputIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const QuantizedLstmInputParamsInfo &	paramsInfo,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsQuantizedType() [1/2]

constexpr bool armnn::IsQuantizedType ( )

Definition at line 280 of file TypesUtils.hpp.

Referenced by ClMultiplicationWorkload::ClMultiplicationWorkload(), RefWorkloadFactory::CreateWorkload(), TensorInfo::IsQuantized(), NeonMultiplicationWorkload::NeonMultiplicationWorkload(), and QuantizeQueueDescriptor::Validate().

 {
     return std::is_integral<T>::value;
 }

◆ IsQuantizedType() [2/2]

constexpr bool armnn::IsQuantizedType ( DataType dataType )

Definition at line 292 of file TypesUtils.hpp.

References IsQuantized8BitType(), and QSymmS16.

 {
     return dataType == DataType::QSymmS16 || IsQuantized8BitType(dataType);
 }

◆ IsReadyForSplitAssignment()

bool armnn::IsReadyForSplitAssignment	(	LayerSelectionInfo::LayerInfoContainer &	layerInfos,
		LayerSelectionInfo &	layerInfo
	)

Definition at line 374 of file SubgraphViewSelector.cpp.

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

 {
     bool ready = true;
     ForEachLayerInput(layerInfos, layerInfo,
                       [&ready](LayerSelectionInfo& parentInfo)
                           {
                               if (!parentInfo.m_IsProcessed)
                               {
                                   ready = false;
                               }
                           });
     return ready;
 }

◆ IsReduceSupported()

bool armnn::IsReduceSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const ReduceDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsReshapeSupported()

bool armnn::IsReshapeSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const ReshapeDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsResizeSupported()

bool armnn::IsResizeSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const ResizeDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsRsqrtSupported()

bool armnn::IsRsqrtSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

◆ IsSigned32()

bool armnn::IsSigned32 ( const WorkloadInfo & info )

Definition at line 48 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

 {
     return IsDataType<DataType::Signed32>(info);
 }

◆ IsSoftmaxSupported()

bool armnn::IsSoftmaxSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSpaceToBatchNdSupported()

bool armnn::IsSpaceToBatchNdSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToBatchNdDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSpaceToDepthSupported()

bool armnn::IsSpaceToDepthSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToDepthDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSplitterSupported()

bool armnn::IsSplitterSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const std::vector< std::reference_wrapper< TensorInfo >> &	outputs,
		const ViewsDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsStackSupported()

bool armnn::IsStackSupported	(	const BackendId &	backend,
		const std::vector< const TensorInfo *>	inputs,
		const TensorInfo &	output,
		const StackDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsStridedSliceSupported()

bool armnn::IsStridedSliceSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const StridedSliceDescriptor &	descriptor,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSubtractionSupported()

bool armnn::IsSubtractionSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSupportedForDataTypeGeneric()

bool armnn::IsSupportedForDataTypeGeneric	(	Optional< std::string &>	reasonIfUnsupported,
		DataType	dataType,
		Float16Func	float16FuncPtr,
		Float32Func	float32FuncPtr,
		Uint8Func	uint8FuncPtr,
		Int32Func	int32FuncPtr,
		BooleanFunc	booleanFuncPtr,
		Params &&...	params
	)

Definition at line 27 of file LayerSupportCommon.hpp.

References Boolean, Float16, Float32, QAsymmU8, and Signed32.

Referenced by RefLayerSupport::IsConvertFp16ToFp32Supported(), RefLayerSupport::IsConvertFp32ToFp16Supported(), and NeonLayerSupport::IsFloorSupported().

 {
     switch(dataType)
     {
         case DataType::Float16:
             return float16FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::Float32:
             return float32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::QAsymmU8:
             return uint8FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::Signed32:
             return int32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         case DataType::Boolean:
             return booleanFuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
         default:
             return false;
     }
 }

◆ IsSwitchSupported()

bool armnn::IsSwitchSupported	(	const BackendId &	backend,
		const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output0,
		const TensorInfo &	output1,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsTransposeConvolution2dSupported()

bool armnn::IsTransposeConvolution2dSupported	(	const BackendId &	backend,
		const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		char *	reasonIfUnsupported = `nullptr`,
		size_t	reasonIfUnsupportedMaxLength = `1024`
	)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ LayerEnumOf() [1/74]

constexpr LayerType armnn::LayerEnumOf ( const T * = nullptr )

◆ LayerEnumOf() [2/74]

constexpr LayerType armnn::LayerEnumOf ( const ActivationLayer * )

Definition at line 110 of file LayersFwd.hpp.

◆ LayerEnumOf() [3/74]

constexpr LayerType armnn::LayerEnumOf ( const AdditionLayer * )

Definition at line 111 of file LayersFwd.hpp.

◆ LayerEnumOf() [4/74]

constexpr LayerType armnn::LayerEnumOf ( const ArgMinMaxLayer * )

Definition at line 112 of file LayersFwd.hpp.

◆ LayerEnumOf() [5/74]

constexpr LayerType armnn::LayerEnumOf ( const BatchNormalizationLayer * )

Definition at line 113 of file LayersFwd.hpp.

◆ LayerEnumOf() [6/74]

constexpr LayerType armnn::LayerEnumOf ( const BatchToSpaceNdLayer * )

Definition at line 114 of file LayersFwd.hpp.

◆ LayerEnumOf() [7/74]

constexpr LayerType armnn::LayerEnumOf ( const CastLayer * )

Definition at line 115 of file LayersFwd.hpp.

◆ LayerEnumOf() [8/74]

constexpr LayerType armnn::LayerEnumOf ( const ChannelShuffleLayer * )

Definition at line 116 of file LayersFwd.hpp.

◆ LayerEnumOf() [9/74]

constexpr LayerType armnn::LayerEnumOf ( const ComparisonLayer * )

Definition at line 117 of file LayersFwd.hpp.

◆ LayerEnumOf() [10/74]

constexpr LayerType armnn::LayerEnumOf ( const ConcatLayer * )

Definition at line 118 of file LayersFwd.hpp.

◆ LayerEnumOf() [11/74]

constexpr LayerType armnn::LayerEnumOf ( const ConstantLayer * )

Definition at line 119 of file LayersFwd.hpp.

◆ LayerEnumOf() [12/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertBf16ToFp32Layer * )

Definition at line 120 of file LayersFwd.hpp.

◆ LayerEnumOf() [13/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp16ToFp32Layer * )

Definition at line 121 of file LayersFwd.hpp.

◆ LayerEnumOf() [14/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToBf16Layer * )

Definition at line 122 of file LayersFwd.hpp.

◆ LayerEnumOf() [15/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToFp16Layer * )

Definition at line 123 of file LayersFwd.hpp.

◆ LayerEnumOf() [16/74]

constexpr LayerType armnn::LayerEnumOf ( const Convolution2dLayer * )

Definition at line 124 of file LayersFwd.hpp.

◆ LayerEnumOf() [17/74]

constexpr LayerType armnn::LayerEnumOf ( const Convolution3dLayer * )

Definition at line 125 of file LayersFwd.hpp.

◆ LayerEnumOf() [18/74]

constexpr LayerType armnn::LayerEnumOf ( const DebugLayer * )

Definition at line 126 of file LayersFwd.hpp.

◆ LayerEnumOf() [19/74]

constexpr LayerType armnn::LayerEnumOf ( const DepthToSpaceLayer * )

Definition at line 127 of file LayersFwd.hpp.

◆ LayerEnumOf() [20/74]

constexpr LayerType armnn::LayerEnumOf ( const DepthwiseConvolution2dLayer * )

Definition at line 128 of file LayersFwd.hpp.

◆ LayerEnumOf() [21/74]

constexpr LayerType armnn::LayerEnumOf ( const DequantizeLayer * )

Definition at line 129 of file LayersFwd.hpp.

◆ LayerEnumOf() [22/74]

constexpr LayerType armnn::LayerEnumOf ( const DetectionPostProcessLayer * )

Definition at line 130 of file LayersFwd.hpp.

◆ LayerEnumOf() [23/74]

constexpr LayerType armnn::LayerEnumOf ( const DivisionLayer * )

Definition at line 131 of file LayersFwd.hpp.

◆ LayerEnumOf() [24/74]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseUnaryLayer * )

Definition at line 132 of file LayersFwd.hpp.

◆ LayerEnumOf() [25/74]

constexpr LayerType armnn::LayerEnumOf ( const FakeQuantizationLayer * )

Definition at line 133 of file LayersFwd.hpp.

◆ LayerEnumOf() [26/74]

constexpr LayerType armnn::LayerEnumOf ( const FillLayer * )

Definition at line 134 of file LayersFwd.hpp.

◆ LayerEnumOf() [27/74]

constexpr LayerType armnn::LayerEnumOf ( const FloorLayer * )

Definition at line 135 of file LayersFwd.hpp.

◆ LayerEnumOf() [28/74]

constexpr LayerType armnn::LayerEnumOf ( const FullyConnectedLayer * )

Definition at line 136 of file LayersFwd.hpp.

◆ LayerEnumOf() [29/74]

constexpr LayerType armnn::LayerEnumOf ( const GatherLayer * )

Definition at line 137 of file LayersFwd.hpp.

◆ LayerEnumOf() [30/74]

constexpr LayerType armnn::LayerEnumOf ( const GatherNdLayer * )

Definition at line 138 of file LayersFwd.hpp.

◆ LayerEnumOf() [31/74]

constexpr LayerType armnn::LayerEnumOf ( const InputLayer * )

Definition at line 139 of file LayersFwd.hpp.

◆ LayerEnumOf() [32/74]

constexpr LayerType armnn::LayerEnumOf ( const InstanceNormalizationLayer * )

Definition at line 140 of file LayersFwd.hpp.

◆ LayerEnumOf() [33/74]

constexpr LayerType armnn::LayerEnumOf ( const L2NormalizationLayer * )

Definition at line 141 of file LayersFwd.hpp.

◆ LayerEnumOf() [34/74]

constexpr LayerType armnn::LayerEnumOf ( const LogicalBinaryLayer * )

Definition at line 142 of file LayersFwd.hpp.

◆ LayerEnumOf() [35/74]

constexpr LayerType armnn::LayerEnumOf ( const LogSoftmaxLayer * )

Definition at line 143 of file LayersFwd.hpp.

◆ LayerEnumOf() [36/74]

constexpr LayerType armnn::LayerEnumOf ( const LstmLayer * )

Definition at line 144 of file LayersFwd.hpp.

◆ LayerEnumOf() [37/74]

constexpr LayerType armnn::LayerEnumOf ( const MapLayer * )

Definition at line 145 of file LayersFwd.hpp.

◆ LayerEnumOf() [38/74]

constexpr LayerType armnn::LayerEnumOf ( const MaximumLayer * )

Definition at line 146 of file LayersFwd.hpp.

◆ LayerEnumOf() [39/74]

constexpr LayerType armnn::LayerEnumOf ( const MeanLayer * )

Definition at line 147 of file LayersFwd.hpp.

◆ LayerEnumOf() [40/74]

constexpr LayerType armnn::LayerEnumOf ( const MemCopyLayer * )

Definition at line 148 of file LayersFwd.hpp.

◆ LayerEnumOf() [41/74]

constexpr LayerType armnn::LayerEnumOf ( const MemImportLayer * )

Definition at line 149 of file LayersFwd.hpp.

◆ LayerEnumOf() [42/74]

constexpr LayerType armnn::LayerEnumOf ( const MergeLayer * )

Definition at line 150 of file LayersFwd.hpp.

◆ LayerEnumOf() [43/74]

constexpr LayerType armnn::LayerEnumOf ( const MinimumLayer * )

Definition at line 151 of file LayersFwd.hpp.

◆ LayerEnumOf() [44/74]

constexpr LayerType armnn::LayerEnumOf ( const MultiplicationLayer * )

Definition at line 152 of file LayersFwd.hpp.

◆ LayerEnumOf() [45/74]

constexpr LayerType armnn::LayerEnumOf ( const NormalizationLayer * )

Definition at line 153 of file LayersFwd.hpp.

◆ LayerEnumOf() [46/74]

constexpr LayerType armnn::LayerEnumOf ( const OutputLayer * )

Definition at line 154 of file LayersFwd.hpp.

◆ LayerEnumOf() [47/74]

constexpr LayerType armnn::LayerEnumOf ( const PadLayer * )

Definition at line 155 of file LayersFwd.hpp.

◆ LayerEnumOf() [48/74]

constexpr LayerType armnn::LayerEnumOf ( const PermuteLayer * )

Definition at line 156 of file LayersFwd.hpp.

◆ LayerEnumOf() [49/74]

constexpr LayerType armnn::LayerEnumOf ( const Pooling2dLayer * )

Definition at line 157 of file LayersFwd.hpp.

◆ LayerEnumOf() [50/74]

constexpr LayerType armnn::LayerEnumOf ( const Pooling3dLayer * )

Definition at line 158 of file LayersFwd.hpp.

◆ LayerEnumOf() [51/74]

constexpr LayerType armnn::LayerEnumOf ( const PreCompiledLayer * )

Definition at line 159 of file LayersFwd.hpp.

◆ LayerEnumOf() [52/74]

constexpr LayerType armnn::LayerEnumOf ( const PreluLayer * )

Definition at line 160 of file LayersFwd.hpp.

◆ LayerEnumOf() [53/74]

constexpr LayerType armnn::LayerEnumOf ( const QuantizeLayer * )

Definition at line 161 of file LayersFwd.hpp.

◆ LayerEnumOf() [54/74]

constexpr LayerType armnn::LayerEnumOf ( const QLstmLayer * )

Definition at line 162 of file LayersFwd.hpp.

◆ LayerEnumOf() [55/74]

constexpr LayerType armnn::LayerEnumOf ( const QuantizedLstmLayer * )

Definition at line 163 of file LayersFwd.hpp.

◆ LayerEnumOf() [56/74]

constexpr LayerType armnn::LayerEnumOf ( const RankLayer * )

Definition at line 164 of file LayersFwd.hpp.

◆ LayerEnumOf() [57/74]

constexpr LayerType armnn::LayerEnumOf ( const ReduceLayer * )

Definition at line 165 of file LayersFwd.hpp.

◆ LayerEnumOf() [58/74]

constexpr LayerType armnn::LayerEnumOf ( const ReshapeLayer * )

Definition at line 166 of file LayersFwd.hpp.

◆ LayerEnumOf() [59/74]

constexpr LayerType armnn::LayerEnumOf ( const ResizeLayer * )

Definition at line 167 of file LayersFwd.hpp.

◆ LayerEnumOf() [60/74]

constexpr LayerType armnn::LayerEnumOf ( const ShapeLayer * )

Definition at line 168 of file LayersFwd.hpp.

◆ LayerEnumOf() [61/74]

constexpr LayerType armnn::LayerEnumOf ( const SliceLayer * )

Definition at line 169 of file LayersFwd.hpp.

◆ LayerEnumOf() [62/74]

constexpr LayerType armnn::LayerEnumOf ( const SoftmaxLayer * )

Definition at line 170 of file LayersFwd.hpp.

◆ LayerEnumOf() [63/74]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToBatchNdLayer * )

Definition at line 171 of file LayersFwd.hpp.

◆ LayerEnumOf() [64/74]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToDepthLayer * )

Definition at line 172 of file LayersFwd.hpp.

◆ LayerEnumOf() [65/74]

constexpr LayerType armnn::LayerEnumOf ( const SplitterLayer * )

Definition at line 173 of file LayersFwd.hpp.

◆ LayerEnumOf() [66/74]

constexpr LayerType armnn::LayerEnumOf ( const StackLayer * )

Definition at line 174 of file LayersFwd.hpp.

◆ LayerEnumOf() [67/74]

constexpr LayerType armnn::LayerEnumOf ( const StandInLayer * )

Definition at line 175 of file LayersFwd.hpp.

◆ LayerEnumOf() [68/74]

constexpr LayerType armnn::LayerEnumOf ( const StridedSliceLayer * )

Definition at line 176 of file LayersFwd.hpp.

◆ LayerEnumOf() [69/74]

constexpr LayerType armnn::LayerEnumOf ( const SubtractionLayer * )

Definition at line 177 of file LayersFwd.hpp.

◆ LayerEnumOf() [70/74]

constexpr LayerType armnn::LayerEnumOf ( const SwitchLayer * )

Definition at line 178 of file LayersFwd.hpp.

◆ LayerEnumOf() [71/74]

constexpr LayerType armnn::LayerEnumOf ( const TransposeLayer * )

Definition at line 179 of file LayersFwd.hpp.

◆ LayerEnumOf() [72/74]

constexpr LayerType armnn::LayerEnumOf ( const TransposeConvolution2dLayer * )

Definition at line 180 of file LayersFwd.hpp.

◆ LayerEnumOf() [73/74]

constexpr LayerType armnn::LayerEnumOf ( const UnidirectionalSequenceLstmLayer * )

Definition at line 181 of file LayersFwd.hpp.

◆ LayerEnumOf() [74/74]

constexpr LayerType armnn::LayerEnumOf ( const UnmapLayer * )

Definition at line 182 of file LayersFwd.hpp.

◆ LevelToString()

std::string armnn::LevelToString ( LogSeverity level )

inline

Definition at line 15 of file Logging.hpp.

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by ScopedRecord::ScopedRecord().

 {
     switch(level)
     {
         case LogSeverity::Trace:
             return "Trace";
         case LogSeverity::Debug:
             return "Debug";
         case LogSeverity::Info:
             return "Info";
         case LogSeverity::Warning:
             return "Warning";
         case LogSeverity::Error:
             return "Error";
         case LogSeverity::Fatal:
             return "Fatal";
         default:
             return "Log";
     }
 }

◆ LogSoftmax()

void LogSoftmax	(	Decoder< float > &	input,
		Encoder< float > &	output,
		const TensorInfo &	inputInfo,
		const LogSoftmaxDescriptor &	descriptor
	)

Definition at line 29 of file LogSoftmax.cpp.

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), IgnoreUnused(), SoftmaxDescriptor::m_Axis, SoftmaxDescriptor::m_Beta, numeric_cast(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

 {
     const unsigned int numDimensions = inputInfo.GetNumDimensions();
 
     bool axisIsValid = ValidateAxis(descriptor.m_Axis, numDimensions);
     ARMNN_ASSERT_MSG(axisIsValid,
         "Axis index is not in range [-numDimensions, numDimensions).");
     IgnoreUnused(axisIsValid);
 
     unsigned int uAxis = descriptor.m_Axis < 0  ?
         numDimensions - armnn::numeric_cast<unsigned int>(std::abs(descriptor.m_Axis)) :
         armnn::numeric_cast<unsigned int>(descriptor.m_Axis);
 
     const TensorShape& inputShape = inputInfo.GetShape();
     const unsigned int outerSize  = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
     const unsigned int axisSize   = inputShape[uAxis];
     const unsigned int innerSize  = armnnUtils::GetNumElementsBetween(inputShape,
                                                                       uAxis + 1,
                                                                       inputShape.GetNumDimensions());
 
     for (unsigned int outer = 0; outer < outerSize; ++outer)
     {
         for (unsigned int inner = 0; inner < innerSize; ++inner)
         {
             // Find max
             input[outer * axisSize * innerSize + inner];
             float maxValue = input.Get();
             for (unsigned int i = 1u; i < axisSize; ++i)
             {
                 input[(outer * axisSize + i) * innerSize + inner];
                 maxValue = std::max(maxValue, input.Get());
             }
 
             // Compute sum
             float sum = 0.0f;
             for (unsigned int i = 0u; i < axisSize; ++i)
             {
                 input[(outer * axisSize + i) * innerSize + inner];
                 sum += std::exp((input.Get() - maxValue) * descriptor.m_Beta);
             }
 
             // Compute log sum
             const float logSum = std::log(sum);
 
             // Compute result
             for (unsigned int i = 0u; i < axisSize; ++i)
             {
                 const unsigned int index = (outer * axisSize + i) * innerSize + inner;
 
                 input [index];
                 output[index];
 
                 output.Set((input.Get() - maxValue) * descriptor.m_Beta - logSum);
             }
         }
     }
 }

◆ LowerString()

std::string armnn::LowerString ( std::string value )

Definition at line 62 of file ClBackendContext.cpp.

 {
     std::transform(value.begin(), value.end(), value.begin(),
                    [](unsigned char c){ return std::tolower(c); });
 
     return value;
 }

◆ LstmImpl()

void LstmImpl	(	const LstmDescriptor &	descriptor,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const TensorShape &	inputToOutputWeightsShape,
		const TensorShape &	recurrentToOutputWeightsShape,
		std::unique_ptr< Decoder< float >> &	inputData,
		std::unique_ptr< Decoder< float >> &	outputStateIn,
		std::unique_ptr< Decoder< float >> &	cellStateIn,
		std::unique_ptr< Encoder< float >> &	outputStateOut,
		std::unique_ptr< Encoder< float >> &	cellStateOut,
		std::unique_ptr< Encoder< float >> &	output,
		std::unique_ptr< Decoder< float >> &	cellStateOutDecoder,
		std::unique_ptr< Decoder< float >> &	outputDecoder,
		std::unique_ptr< Decoder< float >> &	inputToInputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputToForgetWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputToCellWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputToOutputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToInputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToForgetWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToCellWeightsTensor,
		std::unique_ptr< Decoder< float >> &	recurrentToOutputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	cellToInputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	cellToForgetWeightsTensor,
		std::unique_ptr< Decoder< float >> &	cellToOutputWeightsTensor,
		std::unique_ptr< Decoder< float >> &	inputGateBiasTensor,
		std::unique_ptr< Decoder< float >> &	forgetGateBiasTensor,
		std::unique_ptr< Decoder< float >> &	cellBiasTensor,
		std::unique_ptr< Decoder< float >> &	outputGateBiasTensor,
		std::unique_ptr< Decoder< float >> &	projectionWeightsTensor,
		std::unique_ptr< Decoder< float >> &	projectionBiasTensor,
		std::unique_ptr< Decoder< float >> &	inputLayerNormWeights,
		std::unique_ptr< Decoder< float >> &	forgetLayerNormWeights,
		std::unique_ptr< Decoder< float >> &	cellLayerNormWeights,
		std::unique_ptr< Decoder< float >> &	outputLayerNormWeights,
		std::unique_ptr< Encoder< float >> &	inputGateScratch,
		std::unique_ptr< Encoder< float >> &	cellScratch,
		std::unique_ptr< Encoder< float >> &	forgetGateScratch,
		std::unique_ptr< Encoder< float >> &	outputGateScratch,
		std::unique_ptr< Decoder< float >> &	inputGateScratchDecoder,
		std::unique_ptr< Decoder< float >> &	cellScratchDecoder,
		std::unique_ptr< Decoder< float >> &	forgetGateScratchDecoder,
		std::unique_ptr< Decoder< float >> &	outputGateScratchDecoder,
		float	layerNormEpsilon
	)

Definition at line 13 of file Lstm.cpp.

References Activation(), ClipVector(), CopyVector(), TensorInfo::GetDataType(), TensorInfo::GetShape(), LstmDescriptor::m_ActivationFunc, LstmDescriptor::m_CifgEnabled, LstmDescriptor::m_ClippingThresCell, LstmDescriptor::m_ClippingThresProj, LstmDescriptor::m_LayerNormEnabled, LstmDescriptor::m_PeepholeEnabled, LstmDescriptor::m_ProjectionEnabled, MatrixBatchVectorMultiplyAccumulate(), MeanStddevNormalization(), SetActivationParameters(), Sigmoid, Sub1Vector(), VectorBatchVectorAdd(), VectorBatchVectorAssign(), VectorBatchVectorCwiseProduct(), VectorBatchVectorCwiseProductAccumulate(), VectorVectorCwiseProduct(), VectorVectorCwiseProductAccumulate(), and ZeroVector().

Referenced by RefLstmWorkload::ExecuteAsync(), and RefUnidirectionalSequenceLstmWorkload::ExecuteAsync().

 {
     // This is a porting of the LSTM::Eval() method in the Android code base
     // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
 
     const TensorShape& inputShape = inputInfo.GetShape();
     const DataType& outputType = outputInfo.GetDataType();
 
     const uint32_t nBatch = inputShape[0];
     const uint32_t nInput = inputShape[1];
 
     const uint32_t nCell   = inputToOutputWeightsShape[0];
     const uint32_t nOutput = recurrentToOutputWeightsShape[1];
 
     const bool useCifg      = descriptor.m_CifgEnabled;
     const bool usePeephole  = descriptor.m_PeepholeEnabled;
     const bool useLayerNorm = descriptor.m_LayerNormEnabled;
 
     if (!useLayerNorm)
     {
         // Initialize scratch buffers with bias.
         if (!useCifg)
         {
             VectorBatchVectorAssign(*inputGateBiasTensor,
                                     nCell, nBatch, *inputGateScratch);
         }
         VectorBatchVectorAssign(*forgetGateBiasTensor,
                                 nCell, nBatch, *forgetGateScratch);
         VectorBatchVectorAssign(*cellBiasTensor,
                                 nCell, nBatch, *cellScratch);
         VectorBatchVectorAssign(*outputGateBiasTensor,
                                 nCell, nBatch, *outputGateScratch);
     }
     else
     {
         // Initialize scratch buffers with zeroes.
         if (!useCifg)
         {
             ZeroVector(*inputGateScratch, nCell * nBatch);
         }
         ZeroVector(*forgetGateScratch, nCell * nBatch);
         ZeroVector(*cellScratch      , nCell * nBatch);
         ZeroVector(*outputGateScratch, nCell * nBatch);
     }
 
     // For each batch and cell: compute input_weight * input.
     if (!useCifg)
     {
         MatrixBatchVectorMultiplyAccumulate(*inputToInputWeightsTensor,
                                             nCell, nInput, *inputData, nBatch, *inputGateScratch);
     }
     MatrixBatchVectorMultiplyAccumulate(*inputToForgetWeightsTensor,
                                         nCell, nInput, *inputData, nBatch, *forgetGateScratch);
     MatrixBatchVectorMultiplyAccumulate(*inputToCellWeightsTensor,
                                         nCell, nInput, *inputData, nBatch, *cellScratch);
     MatrixBatchVectorMultiplyAccumulate(*inputToOutputWeightsTensor,
                                         nCell, nInput, *inputData, nBatch, *outputGateScratch);
 
     // For each batch and cell: compute recurrent_weight * output_state.
     if (!useCifg)
     {
         MatrixBatchVectorMultiplyAccumulate(*recurrentToInputWeightsTensor,
                                             nCell, nOutput, *outputStateIn, nBatch, *inputGateScratch);
     }
     MatrixBatchVectorMultiplyAccumulate(*recurrentToForgetWeightsTensor,
                                         nCell, nOutput, *outputStateIn, nBatch, *forgetGateScratch);
     MatrixBatchVectorMultiplyAccumulate(*recurrentToCellWeightsTensor,
                                         nCell, nOutput, *outputStateIn, nBatch, *cellScratch);
     MatrixBatchVectorMultiplyAccumulate(*recurrentToOutputWeightsTensor,
                                         nCell, nOutput, *outputStateIn, nBatch, *outputGateScratch);
 
     // For each batch and cell: update input gate.
     if (!useCifg)
     {
         if (usePeephole)
         {
             VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor,
                                                     nCell, *cellStateIn, nBatch, *inputGateScratch);
         }
         if (useLayerNorm)
         {
             MeanStddevNormalization(*inputGateScratchDecoder,
                                     *inputGateScratch, nCell, nBatch, layerNormEpsilon);
             VectorBatchVectorCwiseProduct(*inputLayerNormWeights,
                                           nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
             VectorBatchVectorAdd(*inputGateBiasTensor,
                                  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
         }
         Activation(*inputGateScratchDecoder, *inputGateScratch,
                    TensorInfo({nCell, nBatch}, outputType),
                    ActivationFunction::Sigmoid, 0, 0);
     }
 
     // For each batch and cell: update forget gate.
     if (usePeephole)
     {
         VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell,
                                                 *cellStateIn, nBatch, *forgetGateScratch);
     }
     if (useLayerNorm)
     {
         MeanStddevNormalization(*forgetGateScratchDecoder,
                                 *forgetGateScratch, nCell, nBatch, layerNormEpsilon);
         VectorBatchVectorCwiseProduct(*forgetLayerNormWeights,
                                       nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
         VectorBatchVectorAdd(*forgetGateBiasTensor,
                              nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
     }
     Activation(*forgetGateScratchDecoder, *forgetGateScratch,
                TensorInfo({nCell, nBatch}, outputType),
                ActivationFunction::Sigmoid, 0, 0);
 
     // For each batch and cell: update the cell.
     if (useLayerNorm)
     {
         MeanStddevNormalization(*cellScratchDecoder,
                                 *cellScratch, nCell, nBatch, layerNormEpsilon);
         VectorBatchVectorCwiseProduct(*cellLayerNormWeights,
                                       nCell, *cellScratchDecoder, nBatch, *cellScratch);
         VectorBatchVectorAdd(*cellBiasTensor,
                              nCell, *cellScratchDecoder, nBatch, *cellScratch);
     }
 
     VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut);
 
     ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
     float a = 0;
     float b = 0;
     SetActivationParameters(descriptor.m_ActivationFunc, armnnActivationFunc, a, b);
 
     if (descriptor.m_ActivationFunc > 0)
     {
         Activation(*cellScratchDecoder, *cellScratch,
                    TensorInfo({nCell, nBatch}, outputType),
                    armnnActivationFunc, a, b);
     }
     if (useCifg)
     {
         Sub1Vector(*forgetGateScratchDecoder, nBatch * nCell, *forgetGateScratch);
         VectorVectorCwiseProductAccumulate(
             *cellScratchDecoder, *forgetGateScratchDecoder, nBatch * nCell, *cellStateOut);
     }
     else
     {
         VectorVectorCwiseProductAccumulate(
             *cellScratchDecoder, *inputGateScratchDecoder, nBatch * nCell, *cellStateOut);
     }
     if (descriptor.m_ClippingThresCell > 0.0)
     {
         ClipVector(*cellStateOutDecoder, nBatch * nCell, descriptor.m_ClippingThresCell, *cellStateOut);
     }
 
     // For each batch and cell: update the output gate.
     if (usePeephole)
     {
         VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor,
                                                 nCell, *cellStateOutDecoder, nBatch, *outputGateScratch);
     }
     if (useLayerNorm)
     {
         MeanStddevNormalization(*outputGateScratchDecoder,
                                 *outputGateScratch, nCell, nBatch, layerNormEpsilon);
         VectorBatchVectorCwiseProduct(*outputLayerNormWeights,
                                       nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
         VectorBatchVectorAdd(*outputGateBiasTensor,
                              nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
     }
     Activation(*outputGateScratchDecoder, *outputGateScratch,
                TensorInfo({nCell, nBatch}, outputType),
                ActivationFunction::Sigmoid, 0, 0);
 
     if (descriptor.m_ActivationFunc > 0)
     {
         Activation(*cellStateOutDecoder, *cellScratch,
                    TensorInfo({nCell, nBatch}, outputType),
                    armnnActivationFunc, a, b);
     }
 
     VectorVectorCwiseProduct(*outputGateScratchDecoder, *cellScratchDecoder, nBatch * nCell, *outputGateScratch);
 
     // For each batch: update the projection and output_state.
     if (descriptor.m_ProjectionEnabled)
     {
         if (projectionBiasTensor)
         {
             VectorBatchVectorAssign(*projectionBiasTensor,
                                     nOutput, nBatch, *output);
         }
         MatrixBatchVectorMultiplyAccumulate(*projectionWeightsTensor,
                                             nOutput, nCell, *outputGateScratchDecoder, nBatch, *output);
 
         if (descriptor.m_ClippingThresProj > 0.0)
         {
             ClipVector(*outputDecoder, nBatch * nOutput, descriptor.m_ClippingThresProj, *output);
         }
     }
     else
     {
         CopyVector(*outputGateScratchDecoder, nBatch * nOutput, *output);
     }
 
     CopyVector(*outputDecoder, nBatch * nOutput, *outputStateOut);
 }

◆ MakeDecoder() [1/4]

std::unique_ptr<Decoder<T> > armnn::MakeDecoder	(	const TensorInfo &	info,
		const void *	data = `nullptr`
	)

inline

Definition at line 66 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Boolean, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

 {
     switch(info.GetDataType())
     {
         case DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Decoder>(
                 static_cast<const int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Decoder>(
                 static_cast<const uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS16:
         {
             return std::make_unique<QSymm16Decoder>(
                 static_cast<const int16_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::BFloat16:
         {
             return std::make_unique<BFloat16Decoder>(static_cast<const BFloat16*>(data));
         }
         case DataType::Float16:
         {
             return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
         }
         case DataType::Float32:
         {
             return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
         }
         case DataType::Signed32:
         {
             return MakeSigned32Decoder(info, data);
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
             }
             else
             {
                 return std::make_unique<QSymmS8Decoder>(
                     static_cast<const int8_t*>(data),
                     info.GetQuantizationScale(),
                     info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::Boolean:
         {
             return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeDecoder() [2/4]

std::unique_ptr<Decoder<float> > armnn::MakeDecoder	(	const TensorInfo &	info,
		const void *	data
	)

inline

Definition at line 66 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Boolean, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

 {
     switch(info.GetDataType())
     {
         case DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Decoder>(
                 static_cast<const int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Decoder>(
                 static_cast<const uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS16:
         {
             return std::make_unique<QSymm16Decoder>(
                 static_cast<const int16_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::BFloat16:
         {
             return std::make_unique<BFloat16Decoder>(static_cast<const BFloat16*>(data));
         }
         case DataType::Float16:
         {
             return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
         }
         case DataType::Float32:
         {
             return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
         }
         case DataType::Signed32:
         {
             return MakeSigned32Decoder(info, data);
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
             }
             else
             {
                 return std::make_unique<QSymmS8Decoder>(
                     static_cast<const int8_t*>(data),
                     info.GetQuantizationScale(),
                     info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::Boolean:
         {
             return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeDecoder() [3/4]

std::unique_ptr<Decoder<bool> > armnn::MakeDecoder	(	const TensorInfo &	info,
		const void *	data
	)

inline

Definition at line 136 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, Boolean, and TensorInfo::GetDataType().

 {
     switch(info.GetDataType())
     {
         case DataType::Boolean:
         {
             return std::make_unique<BooleanDecoderBool>(static_cast<const uint8_t*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeDecoder() [4/4]

std::unique_ptr<Decoder<int32_t> > armnn::MakeDecoder	(	const TensorInfo &	info,
		const void *	data
	)

inline

Definition at line 154 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, TensorInfo::GetDataType(), and Signed32.

 {
     switch(info.GetDataType())
     {
         case DataType::Signed32:
         {
             return std::make_unique<Int32ToInt32tDecoder>(static_cast<const int32_t*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeEncoder() [1/4]

std::unique_ptr<Encoder<T> > armnn::MakeEncoder	(	const TensorInfo &	info,
		void *	data = `nullptr`
	)

inline

Definition at line 21 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Boolean, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

 {
     switch(info.GetDataType())
     {
         case armnn::DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Encoder>(
                 static_cast<int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case armnn::DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Encoder>(
                 static_cast<uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisEncoder>(
                         static_cast<int8_t*>(data),
                         params.second,
                         params.first);
             }
             else
             {
                 return std::make_unique<QSymmS8Encoder>(
                         static_cast<int8_t*>(data),
                         info.GetQuantizationScale(),
                         info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::QSymmS16:
         {
             return std::make_unique<QSymm16Encoder>(
                 static_cast<int16_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case armnn::DataType::Signed32:
         {
             return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
         }
         case armnn::DataType::BFloat16:
         {
             return std::make_unique<BFloat16Encoder>(static_cast<armnn::BFloat16*>(data));
         }
         case armnn::DataType::Float16:
         {
             return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
         }
         case armnn::DataType::Float32:
         {
             return std::make_unique<Float32Encoder>(static_cast<float*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeEncoder() [2/4]

std::unique_ptr<Encoder<float> > armnn::MakeEncoder	(	const TensorInfo &	info,
		void *	data
	)

inline

Definition at line 21 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

 {
     switch(info.GetDataType())
     {
         case armnn::DataType::QAsymmS8:
         {
             return std::make_unique<QASymmS8Encoder>(
                 static_cast<int8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case armnn::DataType::QAsymmU8:
         {
             return std::make_unique<QASymm8Encoder>(
                 static_cast<uint8_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case DataType::QSymmS8:
         {
             if (info.HasPerAxisQuantization())
             {
                 std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
                 return std::make_unique<QSymm8PerAxisEncoder>(
                         static_cast<int8_t*>(data),
                         params.second,
                         params.first);
             }
             else
             {
                 return std::make_unique<QSymmS8Encoder>(
                         static_cast<int8_t*>(data),
                         info.GetQuantizationScale(),
                         info.GetQuantizationOffset());
             }
         }
         case armnn::DataType::QSymmS16:
         {
             return std::make_unique<QSymm16Encoder>(
                 static_cast<int16_t*>(data),
                 info.GetQuantizationScale(),
                 info.GetQuantizationOffset());
         }
         case armnn::DataType::Signed32:
         {
             return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
         }
         case armnn::DataType::BFloat16:
         {
             return std::make_unique<BFloat16Encoder>(static_cast<armnn::BFloat16*>(data));
         }
         case armnn::DataType::Float16:
         {
             return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
         }
         case armnn::DataType::Float32:
         {
             return std::make_unique<Float32Encoder>(static_cast<float*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeEncoder() [3/4]

std::unique_ptr<Encoder<bool> > armnn::MakeEncoder	(	const TensorInfo &	info,
		void *	data
	)

inline

Definition at line 90 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, Boolean, and TensorInfo::GetDataType().

 {
     switch(info.GetDataType())
     {
         case armnn::DataType::Boolean:
         {
             return std::make_unique<BooleanEncoder>(static_cast<uint8_t*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Cannot encode from boolean. Not supported target Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeEncoder() [4/4]

std::unique_ptr<Encoder<int32_t> > armnn::MakeEncoder	(	const TensorInfo &	info,
		void *	data
	)

inline

Definition at line 108 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, TensorInfo::GetDataType(), and Signed32.

 {
     switch(info.GetDataType())
     {
         case DataType::Signed32:
         {
             return std::make_unique<Int32ToInt32tEncoder>(static_cast<int32_t*>(data));
         }
         default:
         {
             ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
             break;
         }
     }
     return nullptr;
 }

◆ MakeInfo()

arm_compute::DetectionPostProcessLayerInfo armnn::MakeInfo ( const DetectionPostProcessDescriptor & descriptor )

Definition at line 17 of file NeonDetectionPostProcessWorkload.cpp.

References DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, and DetectionPostProcessDescriptor::m_UseRegularNms.

Referenced by NeonDetectionPostProcessValidate().

 {
     return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
                                                       descriptor.m_MaxClassesPerDetection,
                                                       descriptor.m_NmsScoreThreshold,
                                                       descriptor.m_NmsIouThreshold,
                                                       descriptor.m_NumClasses,
                                                       { descriptor.m_ScaleX,
                                                         descriptor.m_ScaleY,
                                                         descriptor.m_ScaleW,
                                                         descriptor.m_ScaleH },
                                                       descriptor.m_UseRegularNms,
                                                       descriptor.m_DetectionsPerClass);
 }

◆ MakeOptimizations()

Optimizer::Optimizations armnn::MakeOptimizations ( Args &&... args )

Definition at line 43 of file Optimizer.hpp.

References Append().

Referenced by ApplyBackendOptimizations(), Optimize(), and TEST_SUITE().

 {
     Optimizer::Optimizations optimizations;
 
     Append(optimizations, std::forward<Args>(args)...);
 
     return optimizations;
 }

◆ MakeOptional()

Optional<T> armnn::MakeOptional ( Args &&... args )

Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object.

Definition at line 305 of file Optional.hpp.

References CONSTRUCT_IN_PLACE.

 {
     return Optional<T>(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
 }

◆ MakeTransformIterator()

constexpr TransformIterator<Function, Iterator> armnn::MakeTransformIterator	(	Iterator	i,
		Function	f
	)

Definition at line 81 of file TransformIterator.hpp.

Referenced by TEST_SUITE().

 {
     return TransformIterator<Function, Iterator>(i, f);
 }

◆ MirrorPad()

void MirrorPad	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const ITensorHandle *	inputHandle,
		ITensorHandle *	outputHandle,
		const PadQueueDescriptor &	data
	)

Definition at line 59 of file MirrorPad.cpp.

References TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PaddingMode, PadDescriptor::m_PadList, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), Reflect, Encoder< IType >::Set(), and Symmetric.

Referenced by RefPadWorkload::ExecuteAsync().

 {
     auto padList  = data.m_Parameters.m_PadList;
     PaddingMode paddingMode = data.m_Parameters.m_PaddingMode;
 
     TensorShape outputShape = outputInfo.GetShape();
     TensorShape inputShape  = inputInfo.GetShape();
 
     unsigned int numOutputElements = outputInfo.GetNumElements();
     unsigned int numInputDimensions = inputShape.GetNumDimensions();
     assert(numInputDimensions == outputShape.GetNumDimensions());
 
     // If padding mode is Reflect then both paddings must be no greater than inputShape(i) - 1.
     // If padding mode is Symmetric then both paddings must be no greater than inputShape(i).
     const unsigned int isReflect = static_cast<unsigned int>(paddingMode == PaddingMode::Reflect);
     for(unsigned int i = 0; i < padList.size(); ++i)
     {
         if(padList.at(i).first > (inputShape[i] - isReflect) ||
            padList.at(i).second > (inputShape[i] - isReflect))
         {
             throw armnn::InvalidArgumentException("Paddings must be less (Reflect) or "
                                                   "equal (Symmetric) to the dimension size.");
         }
     }
 
     auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
     auto outData   = MakeEncoder<float>(outputInfo, outputHandle->Map());
 
     Decoder<float>& input  = *inputData;
     Encoder<float>& output = *outData;
 
     for(unsigned int idx = 0; idx < numOutputElements; ++idx)
     {
         // Get the coordinates of the current index in vector form. E.g inx 1 = [0, 0, 0, 1 ]
         const std::vector<unsigned int> coord = IndexToCoord(outputShape, idx);
 
         std::vector<unsigned int> dimensions;
         std::vector<unsigned int> coords;
 
         for(unsigned int i = 0; i < numInputDimensions; ++i)
         {
             dimensions.emplace_back(i);
             coords.emplace_back(coord[i]);
         }
 
         auto isInPadding = [&](unsigned int i)
         {
             return (coords[i] < padList[i].first || coords[i] > inputShape[i] + padList[i].first - 1);
         };
 
         auto getReflectIndex = [&](unsigned int i) -> unsigned int
         {
             if(isInPadding(i))
             {
                 if(coords[i] < padList[i].first)
                 {
                     return padList[i].first - coords[i];
                 }
                 else
                 {
                     return 2 * inputShape[i] + padList[i].first - 2 - coords[i];
                 }
             }
             return coords[i] - padList[i].first;
         };
 
         auto getSymmetricIndex = [&](unsigned int i) -> unsigned int
         {
             if(isInPadding(i))
             {
                 if(coords[i] < padList[i].first)
                 {
                     return padList[i].first - coords[i] - 1;
                 }
                 else
                 {
                     return 2 * inputShape[i] + padList[i].first - 1 - coords[i];
                 }
             }
             return coords[i] - padList[i].first;
         };
 
         // Location of the value in the input tensor to use in the output.
         std::vector<unsigned int> coordOfInput;
 
         // any_of works as a loop here to check if any of the dimensions are in the padding.
         // If dimensions is in the padding area, then create the coordinates of the location in the
         // input tensor to use in the output.
         // E.g.
         // Input tensor = [ 1, 2, 3 ], Rank = 1.
         // Output tensor = [ 2, 1, 2, 3, 1 ] if Reflect or [ 1, 1, 2, 3, 3 ] if Symmetric with a padding of (1, 1).
         // So it will either return [ 1 ] or [ 0 ] which is used to set the first value in the output tensor and so on.
         if(std::any_of(dimensions.begin(), dimensions.end(), isInPadding))
         {
             switch(paddingMode)
             {
                 case PaddingMode::Reflect:
                 {
                     for(unsigned int i = 0; i < numInputDimensions; ++i)
                     {
                         coordOfInput.emplace_back(getReflectIndex(i));
                     }
                     break;
                 }
                 case PaddingMode::Symmetric:
                 {
                     for(unsigned int i = 0; i < numInputDimensions; ++i)
                     {
                         coordOfInput.emplace_back(getSymmetricIndex(i));
                     }
                     break;
                 }
                 default:
                     throw InvalidArgumentException("Padding mode not supported.");
                     break;
             }
         }
         else
         {
             for(unsigned int i = 0; i < numInputDimensions; ++i)
             {
                 coordOfInput.emplace_back(coord[i] - padList[i].first);
             }
         }
 
         // Set output value using the coordinate of the input value to use.
         const unsigned int indexOfInput = CoordToIndex(inputShape, coordOfInput);
 
         input[indexOfInput];
         auto inputValue = input.Get();
 
         output[idx];
         output.Set(inputValue);
     }
 }

◆ MockBackendId()

constexpr const char* armnn::MockBackendId ( )

Definition at line 11 of file MockBackendId.hpp.

Referenced by MockBackend::GetIdStatic(), MockBackend::OptimizeSubgraphView(), and TEST_SUITE().

11 { return "MockAcc"; }

◆ MockImportBackendId()

constexpr const char* armnn::MockImportBackendId ( )

Definition at line 12 of file MockImportBackend.hpp.

Referenced by MockImportBackend::GetIdStatic(), and TEST_SUITE().

12 { return "MockRef"; }

◆ MockTensorHandleFactoryId()

constexpr const char* armnn::MockTensorHandleFactoryId ( )

Definition at line 14 of file MockTensorHandleFactory.hpp.

Referenced by MockTensorHandleFactory::GetIdStatic().

 {
     return "Arm/Mock/TensorHandleFactory";
 }

◆ NeonAbsWorkloadValidate()

arm_compute::Status NeonAbsWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonAbsWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEAbsLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonActivationWorkloadValidate()

arm_compute::Status NeonActivationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ActivationDescriptor &	descriptor
	)

Definition at line 17 of file NeonActivationWorkload.cpp.

Referenced by NeonLayerSupport::IsActivationSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
 
     return arm_compute::NEActivationLayer::validate(&aclInput,
                                                     &aclOutput,
                                                     activationLayerInfo);
 }

◆ NeonAdditionWorkloadValidate()

arm_compute::Status NeonAdditionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 20 of file NeonAdditionWorkload.cpp.

Referenced by NeonLayerSupport::IsAdditionSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::NEArithmeticAddition::validate(&aclInput0,
                                                        &aclInput1,
                                                        &aclOutput,
                                                        arm_compute::ConvertPolicy::SATURATE,
                                                        activationInfo);
 }

◆ NeonArgMinMaxWorkloadValidate()

arm_compute::Status NeonArgMinMaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ArgMinMaxDescriptor &	descriptor
	)

Definition at line 31 of file NeonArgMinMaxWorkload.cpp.

Referenced by NeonLayerSupport::IsArgMinMaxSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     auto numDims = input.GetNumDimensions();
     auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
     int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
 
     if (descriptor.m_Function == ArgMinMaxFunction::Max)
     {
         return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MAX);
     }
     else
     {
         return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
                                                        arm_compute::ReductionOperation::ARG_IDX_MIN);
     }
 }

◆ NeonBackendId()

constexpr const char* armnn::NeonBackendId ( )

Definition at line 10 of file NeonBackendId.hpp.

Referenced by NeonBackend::GetIdStatic().

10 { return "CpuAcc"; }

◆ NeonBatchNormalizationValidate()

arm_compute::Status NeonBatchNormalizationValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	mean,
		const TensorInfo &	var,
		const TensorInfo &	beta,
		const TensorInfo &	gamma,
		const BatchNormalizationDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonBatchNormalizationWorkload.cpp.

Referenced by NeonLayerSupport::IsBatchNormalizationSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInputInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclMeanInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclVarInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclBetaInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclGammaInfo =
           armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
                                                             &aclOutputInfo,
                                                             &aclMeanInfo,
                                                             &aclVarInfo,
                                                             &aclBetaInfo,
                                                             &aclGammaInfo,
                                                             descriptor.m_Eps,
                                                             activationInfo);
 }

◆ NeonBatchToSpaceNdWorkloadValidate()

arm_compute::Status NeonBatchToSpaceNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const BatchToSpaceNdDescriptor &	descriptor
	)

Definition at line 20 of file NeonBatchToSpaceNdWorkload.cpp.

Referenced by NeonLayerSupport::IsBatchToSpaceNdSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     // ArmNN blockShape is [H, W] Cl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
 
     const arm_compute::Status aclStatus = arm_compute::NEBatchToSpaceLayer::validate(&aclInputInfo,
                                                                                      blockWidth,
                                                                                      blockHeight,
                                                                                      &aclOutputInfo);
     return aclStatus;
 }

◆ NeonCastValidate()

arm_compute::Status NeonCastValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file NeonCastWorkload.cpp.

Referenced by NeonLayerSupport::IsCastSupported().

 {
     arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NECast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
 }

◆ NeonChannelShuffleValidate()

arm_compute::Status NeonChannelShuffleValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ChannelShuffleDescriptor &	descriptor
	)

Definition at line 17 of file NeonChannelShuffleWorkload.cpp.

Referenced by NeonLayerSupport::IsChannelShuffleSupported().

 {
     arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
     // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
     // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
     arm_compute::DataLayout aclDataLayout;
     if (input.GetNumDimensions() == 4)
     {
         switch (descriptor.m_Axis)
         {
             case 1:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
                 break;
             case 3:
                 aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
                 break;
             default:
                 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
         }
         aclInputInfo.set_data_layout(aclDataLayout);
         aclOutputInfo.set_data_layout(aclDataLayout);
         return arm_compute::NEChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
     }
 }

◆ NeonComparisonWorkloadValidate()

arm_compute::Status NeonComparisonWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ComparisonDescriptor &	descriptor
	)

Definition at line 16 of file NeonComparisonWorkload.cpp.

Referenced by NeonLayerSupport::IsComparisonSupported().

 {
     const arm_compute::TensorInfo aclInput0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
 
     const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
 
     const arm_compute::Status aclStatus = arm_compute::NEElementwiseComparison::validate(&aclInput0,
                                                                                          &aclInput1,
                                                                                          &aclOutput,
                                                                                          comparisonOperation);
     return aclStatus;
 }

◆ NeonConcatWorkloadValidate()

arm_compute::Status NeonConcatWorkloadValidate	(	const std::vector< const TensorInfo *> &	inputs,
		const TensorInfo &	output,
		const OriginsDescriptor &	descriptor
	)

Definition at line 27 of file NeonConcatWorkload.cpp.

Referenced by NeonLayerSupport::IsConcatSupported().

 {
     std::vector<arm_compute::TensorInfo> aclInputs;
     for (const TensorInfo* input : inputs)
     {
         arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
         aclInputs.emplace_back(aclInputInfo);
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
     std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
     for (arm_compute::ITensorInfo& input : aclInputs)
     {
         aclInputPtrs.emplace_back(&input);
     }
 
     size_t aclAxis = CalcAxis(descriptor);
     return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
 }

◆ NeonConstantWorkloadValidate()

arm_compute::Status NeonConstantWorkloadValidate ( const TensorInfo & output )

Definition at line 20 of file NeonConstantWorkload.cpp.

Referenced by NeonLayerSupport::IsConstantSupported().

 {
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     std::array<arm_compute::DataType,9> supportedTypes = {
             arm_compute::DataType::BFLOAT16,
             arm_compute::DataType::F16,
             arm_compute::DataType::F32,
             arm_compute::DataType::QASYMM8,
             arm_compute::DataType::QASYMM8_SIGNED,
             arm_compute::DataType::QSYMM16,
             arm_compute::DataType::QSYMM8,
             arm_compute::DataType::QSYMM8_PER_CHANNEL,
             arm_compute::DataType::S32
     };
     auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
 
     if (it != end(supportedTypes))
     {
         return arm_compute::Status{};
     }
     else
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
     }
 }

◆ NeonConvolution2dWorkloadValidate()

arm_compute::Status NeonConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonConvolution2dWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by NeonLayerSupport::IsConvolution2dSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     // arm_compute::NEConvolutionLayer supports both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "ArmNN NeonConvolution2dWorkload does not support non constant weights."};
     }
 
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
 
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         // Same for bias as weights. We don't currently support non const.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN NeonConvolution2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
                                                      &aclWeightsInfo,
                                                      optionalAclBiasesInfo,
                                                      &aclOutputInfo,
                                                      layerInfo,
                                                      arm_compute::WeightsInfo(),
                                                      aclDilationInfo,
                                                      activationInfo,
                                                      isFastMathEnabled);
 }

◆ NeonConvolution3dWorkloadValidate()

arm_compute::Status NeonConvolution3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Convolution3dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		bool	isFastMathEnabled,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonConvolution3dWorkload.cpp.

Referenced by NeonLayerSupport::IsConvolution3dSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
 
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
                                                                     isFastMathEnabled,
                                                                     activationDescriptor);
 
     return arm_compute::NEConv3D::validate(&aclInputInfo,
                                            &aclWeightsInfo,
                                            optionalAclBiasesInfo,
                                            &aclOutputInfo,
                                            aclConv3DInfo);
 }

◆ NeonDepthToSpaceWorkloadValidate()

arm_compute::Status NeonDepthToSpaceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthToSpaceDescriptor &	descriptor
	)

Definition at line 19 of file NeonDepthToSpaceWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsDepthToSpaceSupported().

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
 
     int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
 
     return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
 }

◆ NeonDepthwiseConvolutionWorkloadValidate()

arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const DepthwiseConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 29 of file NeonDepthwiseConvolutionWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     // The Neon implemented workload does support both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "ArmNN NeonDepthwiseConv2dWorkload does not support non constant weights."};
     }
 
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
     //
     // ACL format for weights for depthwise is:
     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
     // - [1, C, H, W] for [N, C, H, W] input/output layout
     //
     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
     // so we do the permute here for the TensorInfo weights.
     unsigned int aclDepthMultiplier;
     TensorInfo weightsPermuted;
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
 
     // Convert the weights into the compute library format
     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         // Same for bias as weights. We don't currently support non const.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                        "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
         descriptor.m_DilationX, descriptor.m_DilationY);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
         activationDescriptor);
 
     return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
                                                               &aclWeightsInfo,
                                                               optionalAclBiasesInfo,
                                                               &aclOutputInfo,
                                                               aclPadStrideInfo,
                                                               aclDepthMultiplier,
                                                               activationInfo,
                                                               aclDilationInfo);
 }

◆ NeonDequantizeWorkloadValidate()

arm_compute::Status NeonDequantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 22 of file NeonDequantizeWorkload.cpp.

Referenced by NeonLayerSupport::IsDequantizeSupported().

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEDequantizationLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonDetected()

bool NeonDetected ( )

Definition at line 37 of file Utils.cpp.

 {
 #if !defined(ARMNN_BUILD_BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
     auto hwcaps= getauxval(AT_HWCAP);
 #endif
 
 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__aarch64__)
 
     if (hwcaps & HWCAP_ASIMD)
     {
         // On an arm64 device with Neon.
         return true;
     }
     else
     {
         // On an arm64 device without Neon.
         return false;
     }
 
 #endif
 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__arm__)
 
     if (hwcaps & HWCAP_NEON)
     {
         // On an armhf device with Neon.
         return true;
     }
     else
     {
         // On an armhf device without Neon.
         return false;
     }
 
 #endif
 
     // This method of Neon detection is only supported on Linux so in order to prevent a false negative
     // we will return true in cases where detection did not run.
     return true;
 }

◆ NeonDetectionPostProcessValidate()

arm_compute::Status NeonDetectionPostProcessValidate	(	const TensorInfo &	boxEncodings,
		const TensorInfo &	scores,
		const TensorInfo &	anchors,
		const TensorInfo &	detectionBoxes,
		const TensorInfo &	detectionClasses,
		const TensorInfo &	detectionScores,
		const TensorInfo &	numDetections,
		const DetectionPostProcessDescriptor &	descriptor
	)

Definition at line 32 of file NeonDetectionPostProcessWorkload.cpp.

References info, and MakeInfo().

 {
     arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
 
     const arm_compute::TensorInfo aclBoxEncodings =
         armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
 
     const arm_compute::TensorInfo aclScores =
         armcomputetensorutils::BuildArmComputeTensorInfo(scores);
 
     const arm_compute::TensorInfo aclAnchors =
         armcomputetensorutils::BuildArmComputeTensorInfo(anchors);
 
     arm_compute::TensorInfo aclDetectionBoxes =
         armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes);
 
     arm_compute::TensorInfo aclDetectionClasses =
         armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses);
 
     arm_compute::TensorInfo aclDetectionScores =
         armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores);
 
     arm_compute::TensorInfo aclNumDetections =
         armcomputetensorutils::BuildArmComputeTensorInfo(numDetections);
 
     return arm_compute::NEDetectionPostProcessLayer::validate(
             &aclBoxEncodings,
             &aclScores,
             &aclAnchors,
             &aclDetectionBoxes,
             &aclDetectionClasses,
             &aclDetectionScores,
             &aclNumDetections,
             info);
 }

◆ NeonDivisionWorkloadValidate()

arm_compute::Status NeonDivisionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 18 of file NeonDivisionWorkload.cpp.

Referenced by NeonLayerSupport::IsDivisionSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::NEElementwiseDivision::validate(&aclInput0,
                                                         &aclInput1,
                                                         &aclOutput,
                                                         activationInfo);
 }

◆ NeonExpWorkloadValidate()

arm_compute::Status NeonExpWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonExpWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEExpLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonFullyConnectedWorkloadValidate()

arm_compute::Status NeonFullyConnectedWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases,
		const FullyConnectedDescriptor &	descriptor,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 24 of file NeonFullyConnectedWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by NeonLayerSupport::IsFullyConnectedSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     // The NEON implemented workload does support both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
     {
         return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                     "Arm NN NeonFullyConnectedWorkload does not support non constant weights."};
     }
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
     arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
     aclWeights.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiases;
     arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
         // Same for bias as weights. We don't currently support non const.
         if (!biases.value().IsConstant())
         {
             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
                                         "Arm NN NeonFullyConnectedWorkload does not support non constant bias."};
         }
         aclBiases = BuildArmComputeTensorInfo(biases.value());
         aclBiases.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiases = &aclBiases;
     }
 
     const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
     return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
                                                         &aclWeights,
                                                         optionalAclBiases,
                                                         &aclOutput,
                                                         fullyConnectedLayerInfo);
 }

◆ NeonGatherNdWorkloadValidate()

arm_compute::Status NeonGatherNdWorkloadValidate	(	const TensorInfo &	paramsInfo,
		const TensorInfo &	indicesInfo,
		const TensorInfo &	outputInfo
	)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 14 of file NeonGatherNdWorkload.cpp.

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by NeonLayerSupport::IsGatherNdSupported().

 {
     // Calculate ND, K, W, C.
     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
 
     /// Validate Mul
     // Indices with shape { W, ND }
     armnn::TensorInfo indices_W_ND_Info = indicesInfo;
     indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
     const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
 
     // Flattened coefficients with shape { ND }
     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
     const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
 
     // Output of Mul with shape { W, ND }
     const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
 
     auto statusMul = arm_compute::NEPixelWiseMultiplication::validate(&aclIndicesInfo,
                                                                       &aclFlattenedCoeffInfo,
                                                                       &aclOutputMulInfo,
                                                                       1.0f,
                                                                       arm_compute::ConvertPolicy::WRAP,
                                                                       arm_compute::RoundingPolicy::TO_ZERO,
                                                                       arm_compute::ActivationLayerInfo());
 
     /// Validate ReduceSum
     // Flattened indices with shape { W }
     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
     flattenedIndices_Info.SetShape({ keyIndices["W"] });
     const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
 
     const std::vector<unsigned int> armnnReduceAxes(1, 1);
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
                                                                           indices_W_ND_Info.GetNumDimensions(),
                                                                           armnnReduceAxes);
 
     auto statusReduceSum = arm_compute::NEReductionOperation::validate(&aclOutputMulInfo,
                                                                        &aclFlattenedIndicesInfo,
                                                                        static_cast<unsigned int>(coords[0]),
                                                                        arm_compute::ReductionOperation::SUM,
                                                                        false);
 
     /// Validate Gather
     // Params with shape { K, C }
     armnn::TensorInfo params_K_C_Info = paramsInfo;
     params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
     const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
 
     // Output of gather with shape { W, C }
     armnn::TensorInfo outputGather_Info = outputInfo;
     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
     const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
 
     auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
     auto statusGather =
             arm_compute::NEGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
 
     /// Validate Reshape
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
 
     auto statusReshape = arm_compute::NEReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
 
     /// Return OK if all the layers are valid
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusMul.error_code()       == okCode &&
         statusReduceSum.error_code() == okCode &&
         statusGather.error_code()    == okCode &&
         statusReshape.error_code()   == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All GatherND layers validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "GatherND layer validate status failed.");
     }
 }

◆ NeonGatherWorkloadValidate()

arm_compute::Status NeonGatherWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	indices,
		const TensorInfo &	output,
		const GatherDescriptor &	descriptor
	)

Definition at line 13 of file NeonGatherWorkload.cpp.

Referenced by NeonLayerSupport::IsGatherSupported().

 {
     const arm_compute::TensorInfo aclInput   = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
     const arm_compute::TensorInfo aclOutput  = BuildArmComputeTensorInfo(output);
 
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
 
     return arm_compute::NEGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
 }

◆ NeonInstanceNormalizationWorkloadValidate()

arm_compute::Status NeonInstanceNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const InstanceNormalizationDescriptor &	descriptor
	)

Definition at line 19 of file NeonInstanceNormalizationWorkload.cpp.

Referenced by NeonLayerSupport::IsInstanceNormalizationSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     return arm_compute::NEInstanceNormalizationLayer::validate(&aclInputInfo,
                                                                &aclOutputInfo,
                                                                descriptor.m_Gamma,
                                                                descriptor.m_Beta,
                                                                descriptor.m_Eps);
 }

◆ NeonL2NormalizationWorkloadValidate()

arm_compute::Status NeonL2NormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const L2NormalizationDescriptor &	descriptor
	)

Definition at line 19 of file NeonL2NormalizationFloatWorkload.cpp.

Referenced by NeonLayerSupport::IsL2NormalizationSupported().

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
 
     return arm_compute::NEL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
 }

◆ NeonLogicalAndWorkloadValidate()

arm_compute::Status NeonLogicalAndWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonLogicalAndWorkload.cpp.

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::NELogicalAnd::validate(&aclInputInfo0,
                                                                               &aclInputInfo1,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

◆ NeonLogicalNotWorkloadValidate()

arm_compute::Status NeonLogicalNotWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 19 of file NeonLogicalNotWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::NELogicalNot::validate(&aclInputInfo,
                                                                               &aclOutputInfo);
     return aclStatus;
 }

◆ NeonLogicalOrWorkloadValidate()

arm_compute::Status NeonLogicalOrWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonLogicalOrWorkload.cpp.

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

 {
     const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     const arm_compute::Status aclStatus = arm_compute::NELogicalOr::validate(&aclInputInfo0,
                                                                              &aclInputInfo1,
                                                                              &aclOutputInfo);
     return aclStatus;
 }

◆ NeonLogSoftmaxWorkloadValidate()

arm_compute::Status NeonLogSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const LogSoftmaxDescriptor &	descriptor
	)

Definition at line 19 of file NeonLogSoftmaxWorkload.cpp.

Referenced by NeonLayerSupport::IsLogSoftmaxSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::NELogSoftmaxLayer::validate(&aclInputInfo,
                                                     &aclOutputInfo,
                                                     descriptor.m_Beta,
                                                     aclAxis);
 }

◆ NeonLogWorkloadValidate()

arm_compute::Status NeonLogWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonLogWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NELogLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonLstmFloatWorkloadValidate()

arm_compute::Status NeonLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	scratchBuffer,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const LstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 253 of file NeonLstmFloatWorkload.cpp.

Referenced by NeonLayerSupport::IsLstmSupported().

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
 
     // The inputs and outputs
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
 
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
 
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
 
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias != nullptr ?
                                                &aclProjectionBiasInfo : nullptr);
     }
 
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
 
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
                                                         nullptr : &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
 
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
 
     // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
 
     return arm_compute::NELSTMLayer::validate(&aclInputInfo,
                                               &aclInputToForgetWeightsInfo,
                                               &aclInputToCellWeightsInfo,
                                               &aclInputToOutputWeightsInfo,
                                               &aclRecurrentToForgetWeightsInfo,
                                               &aclRecurrentToCellWeightsInfo,
                                               &aclRecurrentToOutputWeightsInfo,
                                               &aclForgetGateBiasInfo,
                                               &aclCellBiasInfo,
                                               &aclOutputGateBiasInfo,
                                               &aclOutputStateInInfo,
                                               &aclCellStateInInfo,
                                               &aclScratchBufferInfo,
                                               &aclOutputStateOutInfo,
                                               &aclCellStateOutInfo,
                                               &aclOutputInfo,
                                               lstm_params_info,
                                               activationLayerInfo,
                                               cell_threshold,
                                               projection_threshold);
 }

◆ NeonMaximumWorkloadValidate()

arm_compute::Status NeonMaximumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Definition at line 14 of file NeonMaximumWorkload.cpp.

Referenced by NeonLayerSupport::IsMaximumSupported().

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEElementwiseMax::validate(&aclInput0,
                                                    &aclInput1,
                                                    &aclOutput);
 }

◆ NeonMeanWorkloadValidate()

arm_compute::Status NeonMeanWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const MeanDescriptor &	descriptor
	)

Definition at line 18 of file NeonMeanWorkload.cpp.

Referenced by NeonLayerSupport::IsMeanSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                           input.GetNumDimensions(),
                                                                           descriptor.m_Axis);
 
     return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
 }

◆ NeonMinimumWorkloadValidate()

arm_compute::Status NeonMinimumWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output
	)

Validate function for validating the inputs and output.

Parameters

[in]	input0	The input0 value to be validated.
[in]	input1	The input1 value to be validated.
[in]	output	The output value to be validated.

Definition at line 15 of file NeonMinimumWorkload.cpp.

Referenced by NeonLayerSupport::IsMinimumSupported().

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEElementwiseMin::validate(&aclInput0,
                                                    &aclInput1,
                                                    &aclOutput);
 }

◆ NeonMultiplicationWorkloadValidate()

arm_compute::Status NeonMultiplicationWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 19 of file NeonMultiplicationWorkload.cpp.

Referenced by NeonLayerSupport::IsMultiplicationSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
     return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1,
                                                             &aclInput2,
                                                             &aclOutput,
                                                             1.0f,
                                                             convertPolicy,
                                                             arm_compute::RoundingPolicy::TO_ZERO,
                                                             activationInfo);
 }

◆ NeonNegWorkloadValidate()

arm_compute::Status NeonNegWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonNegWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NENegLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonNormalizationWorkloadValidate()

arm_compute::Status NeonNormalizationWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const NormalizationDescriptor &	descriptor
	)

Definition at line 49 of file NeonNormalizationFloatWorkload.cpp.

Referenced by NeonLayerSupport::IsNormalizationSupported().

 {
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
 
     return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
 }

◆ NeonPadWorkloadValidate()

arm_compute::Status NeonPadWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PadDescriptor &	descriptor
	)

Definition at line 59 of file NeonPadWorkload.cpp.

Referenced by NeonLayerSupport::IsPadSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
 
     std::reverse_copy(std::begin(descriptor.m_PadList),
                       std::end(descriptor.m_PadList),
                       std::begin(reversed_PadList));
 
     arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
 
     // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
     arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
     return arm_compute::NEPadLayer::validate(&aclInputInfo,
                                              &aclOutputInfo,
                                              padList,
                                              pixelValue,
                                              ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
 }

◆ NeonPermuteWorkloadValidate()

arm_compute::Status NeonPermuteWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const PermuteDescriptor &	descriptor
	)

Definition at line 15 of file NeonPermuteWorkload.cpp.

Referenced by NeonLayerSupport::IsPermuteSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
 
     return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
                                       armcomputetensorutils::BuildArmComputePermutationVector(mappings));
 }

◆ NeonPooling2dWorkloadValidate()

arm_compute::Status NeonPooling2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling2dDescriptor &	descriptor
	)

Definition at line 22 of file NeonPooling2dWorkload.cpp.

Referenced by NeonLayerSupport::IsPooling2dSupported().

 {
     const arm_compute::TensorInfo aclInputInfo =
             BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo =
             BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
 
     return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
 }

◆ NeonPooling3dWorkloadValidate()

arm_compute::Status NeonPooling3dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const Pooling3dDescriptor &	descriptor
	)

Definition at line 15 of file NeonPooling3dWorkload.cpp.

Referenced by NeonLayerSupport::IsPooling3dSupported().

     {
         const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
         const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
         arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
         return arm_compute::NEPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
     }

◆ NeonPreluWorkloadValidate()

arm_compute::Status NeonPreluWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	alpha,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonPreluWorkload.cpp.

Referenced by NeonLayerSupport::IsPreluSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEPReluLayer::validate(&aclInput,
                                                &aclAlpha,
                                                &aclOutput);
 }

◆ NeonQLstmWorkloadValidate()

arm_compute::Status NeonQLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	output,
		const QLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 243 of file NeonQLstmWorkload.cpp.

Referenced by NeonLayerSupport::IsQLstmSupported().

 {
     arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
 
     // Input/Output tensor info
     const arm_compute::TensorInfo aclInputInfo         = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo   = BuildArmComputeTensorInfo(cellStateIn);
 
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo   = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputInfo         = BuildArmComputeTensorInfo(output);
 
     // Mandatory tensor info
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
             = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     // Optional tensor info
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
 
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
 
     arm_compute::TensorInfo aclInputGateBiasInfo;
 
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
 
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
     // Create tensor info for optional params if they are enabled
     if (descriptor.m_PeepholeEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
 
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
 
         // Set peephole params info
         aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
                                           &aclCellToOutputWeightsInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
 
         // Set projection params info
         aclParamsInfo.set_projection_params(
             &aclProjectionWeightsInfo,
             paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
     }
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
 
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo   = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         // Set layer norm params info
         aclParamsInfo.set_layer_normalization_params(
             paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
             &aclForgetLayerNormWeightsInfo,
             &aclCellLayerNormWeightsInfo,
             &aclOutputLayerNormWeightsInfo);
     }
 
     if (!descriptor.m_CifgEnabled)
     {
         aclInputToInputWeightsInfo     = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo           = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
 
         // Set CIFG params info
         aclParamsInfo.set_cifg_params(
             &aclInputToInputWeightsInfo,
             &aclRecurrentToInputWeightsInfo,
             paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
             &aclInputGateBiasInfo);
     }
 
     // Set scalar descriptor params
     aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
     aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
     aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
     aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
                                           descriptor.m_ForgetIntermediateScale,
                                           descriptor.m_CellIntermediateScale,
                                           descriptor.m_OutputIntermediateScale);
 
     // QLSTM NEON validate
     return arm_compute::NEQLSTMLayer::validate(&aclInputInfo,
                                                &aclInputToForgetWeightsInfo,
                                                &aclInputToCellWeightsInfo,
                                                &aclInputToOutputWeightsInfo,
                                                &aclRecurrentToForgetWeightsInfo,
                                                &aclRecurrentToCellWeightsInfo,
                                                &aclRecurrentToOutputWeightsInfo,
                                                &aclForgetGateBiasInfo,
                                                &aclCellBiasInfo,
                                                &aclOutputGateBiasInfo,
                                                &aclCellStateInInfo,
                                                &aclOutputStateInInfo,
                                                &aclCellStateOutInfo,
                                                &aclOutputStateOutInfo,
                                                &aclOutputInfo,
                                                aclParamsInfo);
 }

◆ NeonQuantizedLstmWorkloadValidate()

arm_compute::Status NeonQuantizedLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	outputStateOut,
		const QuantizedLstmInputParamsInfo &	paramsInfo
	)

Definition at line 131 of file NeonQuantizedLstmWorkload.cpp.

Referenced by NeonLayerSupport::IsQuantizedLstmSupported().

 {
     // The inputs and outputs
     const arm_compute::TensorInfo aclInputInfo          = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclCellStateInInfo    = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclOutputStateInInfo  = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateOutInfo   = BuildArmComputeTensorInfo(cellStateOut);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
 
     const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
 
     const arm_compute::TensorInfo aclInputGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                   = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     return arm_compute::NELSTMLayerQuantized::validate(&aclInputInfo,
                                                        &aclInputToInputWeightsInfo,
                                                        &aclInputToForgetWeightsInfo,
                                                        &aclInputToCellWeightsInfo,
                                                        &aclInputToOutputWeightsInfo,
                                                        &aclRecurrentToInputWeightsInfo,
                                                        &aclRecurrentToForgetWeightsInfo,
                                                        &aclRecurrentToCellWeightsInfo,
                                                        &aclRecurrentToOutputWeightsInfo,
                                                        &aclInputGateBiasInfo,
                                                        &aclForgetGateBiasInfo,
                                                        &aclCellBiasInfo,
                                                        &aclOutputGateBiasInfo,
                                                        &aclCellStateInInfo,
                                                        &aclOutputStateInInfo,
                                                        &aclCellStateOutInfo,
                                                        &aclOutputStateOutInfo);
 }

◆ NeonQuantizeWorkloadValidate()

arm_compute::Status NeonQuantizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonQuantizeWorkload.cpp.

Referenced by NeonLayerSupport::IsQuantizeSupported().

 {
     const arm_compute::TensorInfo neonInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEQuantizationLayer::validate(&neonInputInfo, &neonOutputInfo);
 }

◆ NeonReduceWorkloadValidate()

arm_compute::Status NeonReduceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ReduceDescriptor &	descriptor
	)

Definition at line 19 of file NeonReduceWorkload.cpp.

References ReduceDescriptor::m_vAxis.

Referenced by NeonLayerSupport::IsReduceSupported().

 {
     if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
     {
         const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
         const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
         arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
                                                                               input.GetNumDimensions(),
                                                                               descriptor.m_vAxis);
 
         return arm_compute::NEReductionOperation::validate(&aclInputInfo,
                                                            &aclOutputInfo,
                                                            static_cast<unsigned int>(coords[0]),
                                                            ConvertReductionOperationToAcl(descriptor),
                                                            descriptor.m_KeepDims);
     }
     else
     {
         // Validate layer if there are multiple axes.
         arm_compute::Status status;
         IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, descriptor, status);
         return status;
     }
 }

◆ NeonReshapeWorkloadValidate()

arm_compute::Status NeonReshapeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonReshapeWorkload.cpp.

Referenced by NeonLayerSupport::IsReshapeSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
 }

◆ NeonResizeWorkloadValidate()

arm_compute::Status NeonResizeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const ResizeDescriptor &	descriptor
	)

Definition at line 22 of file NeonResizeWorkload.cpp.

Referenced by NeonLayerSupport::IsResizeSupported().

 {
     arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input);
     arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
 
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
     aclInputInfo.set_data_layout(aclDataLayout);
     aclOutputInfo.set_data_layout(aclDataLayout);
 
     arm_compute::InterpolationPolicy aclInterpolationPolicy =
             ConvertResizeMethodToAclInterpolationPolicy(descriptor.m_Method);
 
     arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
                                                                                  arm_compute::SamplingPolicy::TOP_LEFT;
 
     bool usePadding = false;
 
     return arm_compute::NEScale::validate(&aclInputInfo,
                                           &aclOutputInfo,
                                           arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
                                                                        arm_compute::BorderMode::REPLICATE,
                                                                        arm_compute::PixelValue(0.f),
                                                                        samplingPolicy,
                                                                        usePadding,
                                                                        descriptor.m_AlignCorners));
 
 }

◆ NeonRsqrtWorkloadValidate()

arm_compute::Status NeonRsqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonRsqrtWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NERsqrtLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonSinWorkloadValidate()

arm_compute::Status NeonSinWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 17 of file NeonSinWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     return arm_compute::NESinLayer::validate(&aclInput, &aclOutput);
 }

◆ NeonSliceWorkloadValidate()

arm_compute::Status NeonSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SliceDescriptor &	descriptor
	)

Definition at line 21 of file NeonSliceWorkload.cpp.

Referenced by NeonLayerSupport::IsSliceSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
 
     std::tie(starts, ends) = SetNeonSliceData(descriptor.m_Begin, descriptor.m_Size);
 
     return arm_compute::NESlice::validate(&aclInputInfo, &aclOutputInfo, starts, ends);
 }

◆ NeonSoftmaxWorkloadValidate()

arm_compute::Status NeonSoftmaxWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SoftmaxDescriptor &	descriptor
	)

Definition at line 19 of file NeonSoftmaxWorkload.cpp.

Referenced by NeonLayerSupport::IsSoftmaxSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
     return arm_compute::NESoftmaxLayer::validate(&aclInputInfo,
                                                  &aclOutputInfo,
                                                  descriptor.m_Beta,
                                                  aclAxis);
 }

◆ NeonSpaceToBatchNdWorkloadValidate()

arm_compute::Status NeonSpaceToBatchNdWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToBatchNdDescriptor &	descriptor
	)

Definition at line 20 of file NeonSpaceToBatchNdWorkload.cpp.

Referenced by NeonLayerSupport::IsSpaceToBatchNdSupported().

 {
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     // ArmNN blockShape is [H, W] Cl asks for W, H
     int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
     int32_t blockWidth  = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
 
     arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(
             descriptor.m_PadList[1].first, descriptor.m_PadList[0].first);
     arm_compute::Size2D paddingRightBottom  = BuildArmComputeSize2D(
             descriptor.m_PadList[1].second, descriptor.m_PadList[0].second);
 
     return arm_compute::NESpaceToBatchLayer::validate(&aclInputInfo,
                                                       blockWidth,
                                                       blockHeight,
                                                       paddingLeftTop,
                                                       paddingRightBottom,
                                                       &aclOutputInfo);
 }

◆ NeonSpaceToDepthWorkloadValidate()

arm_compute::Status NeonSpaceToDepthWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const SpaceToDepthDescriptor &	descriptor
	)

Definition at line 19 of file NeonSpaceToDepthWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsSpaceToDepthSupported().

 {
     DataLayout dataLayout = descriptor.m_DataLayout;
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
 
     int32_t blockSize  = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
 
     return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
 }

◆ NeonSplitterWorkloadValidate()

arm_compute::Status NeonSplitterWorkloadValidate	(	const TensorInfo &	input,
		const std::vector< std::reference_wrapper< TensorInfo >> &	outputs,
		unsigned int	splitAxis
	)

Definition at line 32 of file NeonSplitterWorkload.cpp.

Referenced by NeonLayerSupport::IsSplitterSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
 
     size_t numOutputs = outputs.size();
 
     std::vector<arm_compute::TensorInfo> aclOutputs;
     aclOutputs.reserve(numOutputs);
 
     std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
     aclOutputPtr.reserve(numOutputs);
 
     for (size_t i = 0u; i < outputs.size(); ++i)
     {
         aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
         aclOutputPtr.emplace_back(&aclOutputs.back());
     }
 
     unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
     return arm_compute::NESplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
 }

◆ NeonSqrtWorkloadValidate()

arm_compute::Status NeonSqrtWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output
	)

Definition at line 18 of file NeonSqrtWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

 {
     const arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     ActivationDescriptor descriptor;
     descriptor.m_Function = ActivationFunction::Sqrt;
     const arm_compute::ActivationLayerInfo activationLayerInfo =
             ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
 
     return arm_compute::NEActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
 }

◆ NeonStackWorkloadValidate()

arm_compute::Status NeonStackWorkloadValidate	(	const std::vector< const TensorInfo *> &	inputs,
		const TensorInfo &	output,
		const StackDescriptor &	descriptor
	)

Definition at line 27 of file NeonStackWorkload.cpp.

Referenced by NeonLayerSupport::IsStackSupported().

 {
     std::vector<arm_compute::TensorInfo> aclInputs;
     for (const TensorInfo* input : inputs)
     {
         arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
         aclInputs.emplace_back(aclInputInfo);
     }
 
     std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
     for (arm_compute::ITensorInfo& input : aclInputs)
     {
         aclInputPtrs.emplace_back(&input);
     }
 
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
     int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
     return arm_compute::NEStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
 }

◆ NeonStridedSliceWorkloadValidate()

arm_compute::Status NeonStridedSliceWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const StridedSliceDescriptor &	descriptor
	)

Definition at line 19 of file NeonStridedSliceWorkload.cpp.

Referenced by NeonLayerSupport::IsStridedSliceSupported().

 {
     const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
 
     std::tie(starts, ends, strides) = SetNeonStridedSliceData(descriptor.m_Begin,
                                                               descriptor.m_End,
                                                               descriptor.m_Stride);
 
     auto numDimensions       = armnn::numeric_cast<int>(input.GetNumDimensions());
     int32_t begin_mask       = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
     int32_t end_mask         = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
     int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
 
     return arm_compute::NEStridedSlice::validate(&aclInput,
                                                  &aclOutput,
                                                  starts,
                                                  ends,
                                                  strides,
                                                  begin_mask,
                                                  end_mask,
                                                  shrink_axis_mask);
 }

◆ NeonSubtractionWorkloadValidate()

arm_compute::Status NeonSubtractionWorkloadValidate	(	const TensorInfo &	input0,
		const TensorInfo &	input1,
		const TensorInfo &	output,
		const ActivationDescriptor *	activationDescriptor
	)

Definition at line 22 of file NeonSubtractionWorkload.cpp.

Referenced by NeonLayerSupport::IsSubtractionSupported(), and NeonBackend::OptimizeSubgraphView().

 {
     const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
     const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
             activationDescriptor);
 
     return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
                                                           &aclInput1,
                                                           &aclOutput,
                                                           arm_compute::ConvertPolicy::SATURATE,
                                                           activationInfo);
 }

◆ NeonTensorHandleFactoryId()

constexpr const char* armnn::NeonTensorHandleFactoryId ( )

Definition at line 14 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetIdStatic().

14 { return "Arm/Neon/TensorHandleFactory"; }

◆ NeonTransposeConvolution2dWorkloadValidate()

arm_compute::Status NeonTransposeConvolution2dWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeConvolution2dDescriptor &	descriptor,
		const TensorInfo &	weights,
		const Optional< TensorInfo > &	biases
	)

Definition at line 25 of file NeonTransposeConvolution2dWorkload.cpp.

Referenced by NeonLayerSupport::IsTransposeConvolution2dSupported().

 {
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
 
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
 
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
     arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
 
     return arm_compute::NEDeconvolutionLayer::validate(&aclInputInfo,
                                                        &aclWeightsInfo,
                                                        optionalAclBiasesInfo,
                                                        &aclOutputInfo,
                                                        layerInfo);
 }

◆ NeonTransposeWorkloadValidate()

arm_compute::Status NeonTransposeWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	output,
		const TransposeDescriptor &	descriptor
	)

Definition at line 15 of file NeonTransposeWorkload.cpp.

Referenced by NeonLayerSupport::IsTransposeSupported().

 {
     const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
     const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
 
     return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
                                             armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
 }

◆ NeonUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const UnidirectionalSequenceLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 510 of file NeonUnidirectionalSequenceLstmFloatWorkload.cpp.

References TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

 {
     TensorShape inputLayerShape = input.GetShape();
     TensorShape outputLayerShape = outputStateIn.GetShape();
 
     unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
     unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
     unsigned int inputSize = inputLayerShape[2];
     unsigned int outputSize = outputLayerShape[2];
 
     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
 
     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute1 status");
     arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                           "Split status");
     arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                          "LSTM status");
     arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                            "Concat status");
     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute2 status");
 
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     //
     // Permute validate
     //
     TensorInfo permuteOutInfo = TensorInfo(input);
     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
     if (!descriptor.m_TimeMajor)
     {
         statusPermute1 =  arm_compute::NEPermute::validate(&aclInputInfo,
                                                            &aclPermuteOutInfo,
                                                            arm_compute::PermutationVector(0U, 2U, 1U));
     }
 
     //
     // Split and Concat Tensors validate
     //
     std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
     std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
     std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
     splitterOutputsTensorInfos.reserve(maxTime);
     concatInputsTensorInfos.reserve(maxTime);
     for (unsigned int i = 0; i < maxTime; ++i)
     {
         arm_compute::TensorInfo splitter_out;
         arm_compute::TensorInfo concat_in;
 
         auto splitterTensorInfo = TensorInfo(input);
         auto concatTensorInfo   = TensorInfo(output);
         splitterTensorInfo.SetShape({batchSize, inputSize});
         concatTensorInfo.SetShape({batchSize, outputSize});
 
         arm_compute::TensorInfo aclSplitterTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
         arm_compute::TensorInfo aclConcatTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
 
         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
     }
 
     //
     // Split validate
     //
     unsigned int numberDimensions = 3;
     unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
     unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
 
     if (maxTime != 1) // ACL split does not work with only one element to split.
     {
         if (!descriptor.m_TimeMajor)
         {
             statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
                                                          splitterOutputsTensorInfosPtr,
                                                          aclAxisSplit);
         } else
         {
             statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
         }
     }
 
     //
     // LSTM validate
     //
 
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
 
     const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
 
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
 
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
 
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
 
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
                                          &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
     }
 
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
 
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
                                                         &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
 
     // Need to be set at negative threshold to be compatible for ACL
     float cell_threshold = descriptor.m_ClippingThresCell;
     float projection_threshold = descriptor.m_ClippingThresProj;
 
     arm_compute::ActivationLayerInfo activationLayerInfo =
         ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
 
     for (unsigned int i = 0; i != maxTime; ++i)
     {
 
         // Set LSTM input and output ITensors depending on:
         // input format (timeMajor) & number of LSTM batches (maxTime).
         arm_compute::ITensorInfo* outputLSTM;
         arm_compute::ITensorInfo* inputLSTM;
 
         // If there is only one LSTM time major batch, we will not concat OR permute.
         // Set input of LSTM to be first input ITensor.
         // Set output of LSTM to be final output ITensor.
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
 
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
 
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
 
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
 
             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
         }
         // If there is only one LSTM batch major batch, we will not concat, only permute.
         // Set input of LSTM to be output of initial permute.
         // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
         // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
         else if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = &aclPermuteOutInfo;
 
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
         // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
         else
         {
             inputLSTM = splitterOutputsTensorInfosPtr[i];
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
 
         statusLSTM = arm_compute::NELSTMLayer::validate(inputLSTM,
                                                         &aclInputToForgetWeightsInfo,
                                                         &aclInputToCellWeightsInfo,
                                                         &aclInputToOutputWeightsInfo,
                                                         &aclRecurrentToForgetWeightsInfo,
                                                         &aclRecurrentToCellWeightsInfo,
                                                         &aclRecurrentToOutputWeightsInfo,
                                                         &aclForgetGateBiasInfo,
                                                         &aclCellBiasInfo,
                                                         &aclOutputGateBiasInfo,
                                                         &aclOutputStateInInfo,
                                                         &aclCellStateInInfo,
                                                         &aclScratchBufferInfo,
                                                         &aclOutputStateOutInfo,
                                                         &aclCellStateOutInfo,
                                                         outputLSTM,
                                                         lstm_params_info,
                                                         activationLayerInfo,
                                                         cell_threshold,
                                                         projection_threshold);
 
         if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
         {
             break;
         }
     }
 
     //
     // Concat validate
     //
 
     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
 
     TensorInfo concatOutputTensorInfo = TensorInfo(output);
     concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
     arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
 
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
         for (unsigned int i = 0; i < maxTime; ++i)
         {
             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
         }
 
         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclConcatOutputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclOutputInfo,
                                                                      aclAxisConcat);
         }
     }
     // If only one LSTM batch, we do not concat and/or permute.
     // Must ensure final output info is expanded to correct batch major dimensions.
     else
     {
         if (!descriptor.m_TimeMajor)
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
 
     //
     // Permute validate
     //
     if (!descriptor.m_TimeMajor)
     {
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
             statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
         else
         {
             statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
     }
 
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusPermute1.error_code() == okCode &&
         statusSplit.error_code()    == okCode &&
         statusLSTM .error_code()    == okCode &&
         statusConcat.error_code()   == okCode &&
         statusPermute2.error_code() == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All Unidirectional Sequence LSTM layer validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
 }

◆ NeonUnidirectionalSequenceLstmWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate	(	const TensorInfo &	input,
		const TensorInfo &	outputStateIn,
		const TensorInfo &	cellStateIn,
		const TensorInfo &	outputStateOut,
		const TensorInfo &	cellStateOut,
		const TensorInfo &	output,
		const UnidirectionalSequenceLstmDescriptor &	descriptor,
		const LstmInputParamsInfo &	paramsInfo
	)

Definition at line 491 of file NeonUnidirectionalSequenceLstmWorkload.cpp.

References TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

 {
     TensorShape inputLayerShape = input.GetShape();
     TensorShape outputLayerShape = output.GetShape();
 
     unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
     unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
     unsigned int inputSize = inputLayerShape[2];
     unsigned int outputSize = outputLayerShape[2];
 
     const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
     const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
 
     arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute1 status");
     arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                           "Split status");
     arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                          "LSTM status");
     arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                            "Concat status");
     arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
                                                              "Permute2 status");
 
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
     //
     // Permute validate
     //
     TensorInfo permuteOutInfo = TensorInfo(input);
     arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
     if (!descriptor.m_TimeMajor)
     {
         statusPermute1 =  arm_compute::NEPermute::validate(&aclInputInfo,
                                                            &aclPermuteOutInfo,
                                                            arm_compute::PermutationVector(0U, 2U, 1U));
     }
 
     //
     // Split and Concat Tensors validate
     //
     std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
     std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
     std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
     std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
     splitterOutputsTensorInfos.reserve(maxTime);
     concatInputsTensorInfos.reserve(maxTime);
     for (unsigned int i = 0; i < maxTime; ++i)
     {
         arm_compute::TensorInfo splitter_out;
         arm_compute::TensorInfo concat_in;
 
         auto splitterTensorInfo = TensorInfo(input);
         auto concatTensorInfo   = TensorInfo(output);
         splitterTensorInfo.SetShape({batchSize, inputSize});
         concatTensorInfo.SetShape({batchSize, outputSize});
 
         arm_compute::TensorInfo aclSplitterTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
         arm_compute::TensorInfo aclConcatTensorInfo
             = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
 
         splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
         concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
         splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
         concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
     }
 
     //
     // Split validate
     //
     unsigned int numberDimensions = 3;
     unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
     unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
 
     if (maxTime != 1) // ACL split does not work with only one element to split.
     {
         if (!descriptor.m_TimeMajor)
         {
             statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
                                                          splitterOutputsTensorInfosPtr,
                                                          aclAxisSplit);
         } else
         {
             statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
         }
     }
 
     //
     // LSTM validate
     //
 
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
 
     const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
 
     lstm_params_info.set_cell_clip_params(descriptor.m_ClippingThresCell);
     lstm_params_info.set_projection_clip_params(descriptor.m_ClippingThresProj);
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
     const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
     const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
     const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
     const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
 
     // Basic parameters
     const arm_compute::TensorInfo aclInputToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
     const arm_compute::TensorInfo aclInputToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
     const arm_compute::TensorInfo aclInputToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
     const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
     const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
     const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
     const arm_compute::TensorInfo aclForgetGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
     const arm_compute::TensorInfo aclCellBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
     const arm_compute::TensorInfo aclOutputGateBiasInfo
                                       = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
 
     arm_compute::TensorInfo aclInputToInputWeightsInfo;
     arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
     arm_compute::TensorInfo aclCellToInputWeightsInfo;
     arm_compute::TensorInfo aclInputGateBiasInfo;
     arm_compute::TensorInfo aclProjectionWeightsInfo;
     arm_compute::TensorInfo aclProjectionBiasInfo;
     arm_compute::TensorInfo aclCellToForgetWeightsInfo;
     arm_compute::TensorInfo aclCellToOutputWeightsInfo;
 
     arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
     arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
     arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
     arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
 
     if (!descriptor.m_CifgEnabled)
     {
         if (descriptor.m_PeepholeEnabled)
         {
             aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
         }
         aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
         aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
         aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
 
         lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
                                          &aclRecurrentToInputWeightsInfo,
                                          descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
                                          &aclInputGateBiasInfo);
     }
 
     if (descriptor.m_ProjectionEnabled)
     {
         if (paramsInfo.m_ProjectionBias != nullptr)
         {
             aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
         }
         aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
 
         lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
                                                paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
     }
 
     if (descriptor.m_PeepholeEnabled)
     {
         aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
         aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
 
         lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
     }
 
     if (descriptor.m_LayerNormEnabled)
     {
         if (!descriptor.m_CifgEnabled)
         {
             aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
         }
         aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
         aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
                                                         &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
     }
 
     lstm_params_info.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
                                              descriptor.m_ForgetIntermediateScale,
                                              descriptor.m_CellIntermediateScale,
                                              descriptor.m_OutputIntermediateScale);
 
     lstm_params_info.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
 
     for (unsigned int i = 0; i != maxTime; ++i)
     {
 
         // Set LSTM input and output ITensors depending on:
         // input format (timeMajor) & number of LSTM batches (maxTime).
         arm_compute::ITensorInfo* outputLSTM;
         arm_compute::ITensorInfo* inputLSTM;
 
         // If there is only one LSTM time major batch, we will not concat OR permute.
         // Set input of LSTM to be first input ITensor.
         // Set output of LSTM to be final output ITensor.
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
 
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
 
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
 
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
 
             const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
         }
         // If there is only one LSTM batch major batch, we will not concat, only permute.
         // Set input of LSTM to be output of initial permute.
         // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
         // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
         else if (maxTime == 1 && !descriptor.m_TimeMajor)
         {
             TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = &aclPermuteOutInfo;
 
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
         // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
         else
         {
             inputLSTM = splitterOutputsTensorInfosPtr[i];
             outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
         }
 
         statusLSTM = arm_compute::NEQLSTMLayer::validate(inputLSTM,
                                                          &aclInputToForgetWeightsInfo,
                                                          &aclInputToCellWeightsInfo,
                                                          &aclInputToOutputWeightsInfo,
                                                          &aclRecurrentToForgetWeightsInfo,
                                                          &aclRecurrentToCellWeightsInfo,
                                                          &aclRecurrentToOutputWeightsInfo,
                                                          &aclForgetGateBiasInfo,
                                                          &aclCellBiasInfo,
                                                          &aclOutputGateBiasInfo,
                                                          &aclCellStateInInfo,
                                                          &aclOutputStateInInfo,
                                                          &aclCellStateOutInfo,
                                                          &aclOutputStateOutInfo,
                                                          outputLSTM,
                                                          lstm_params_info);
     }
 
     //
     // Concat validate
     //
 
     // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
     TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
 
     TensorInfo concatOutputTensorInfo = TensorInfo(output);
     concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
     arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
 
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
         for (unsigned int i = 0; i < maxTime; ++i)
         {
             auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
             concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
         }
 
         unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclConcatOutputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
                                                                      &aclOutputInfo,
                                                                      aclAxisConcat);
         }
     }
     // If only one LSTM batch, we do not concat and/or permute.
     // Must ensure final output info is expanded to correct batch major dimensions.
     else
     {
         if (!descriptor.m_TimeMajor)
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
             const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
                 BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
 
     //
     // Permute validate
     //
     if (!descriptor.m_TimeMajor)
     {
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
             statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
         else
         {
             statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
     }
 
     auto okCode = arm_compute::ErrorCode::OK;
     if (statusPermute1.error_code() == okCode &&
         statusSplit.error_code()    == okCode &&
         statusLSTM .error_code()    == okCode &&
         statusConcat.error_code()   == okCode &&
         statusPermute2.error_code() == okCode)
     {
         return arm_compute::Status(arm_compute::ErrorCode::OK,
                                    "All Unidirectional Sequence LSTM layer validate status OK.");
     }
     else
     {
         return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
                                    "Unidirectional Sequence LSTM layer validate status failed.");
     }
 }

◆ NextIndex()

bool armnn::NextIndex	(	const unsigned int	numDims,
		const armnn::TensorShape &	dims,
		std::vector< unsigned int > &	current
	)

Definition at line 19 of file Reduce.cpp.

Referenced by Reduce().

 {
     unsigned int carry = 1;
 
     for (unsigned int idx = numDims; idx-- > 0; )
     {
         unsigned int current_val = current[idx] + carry;
         if (dims[idx] == current_val)
         {
             current[idx] = 0;
         }
         else
         {
             current[idx] = current_val;
             carry = 0;
             break;
         }
     }
     return (carry == 0);
 }

◆ NonMaxSuppression()

std::vector< unsigned int > NonMaxSuppression	(	unsigned int	numBoxes,
		const std::vector< float > &	boxCorners,
		const std::vector< float > &	scores,
		float	nmsScoreThreshold,
		unsigned int	maxDetection,
		float	nmsIouThreshold
	)

Definition at line 49 of file DetectionPostProcess.cpp.

References GenerateRangeK(), IntersectionOverUnion(), numeric_cast(), and TopKSort().

Referenced by DetectionPostProcess(), and TEST_SUITE().

 {
     // Select boxes that have scores above a given threshold.
     std::vector<float> scoresAboveThreshold;
     std::vector<unsigned int> indicesAboveThreshold;
     for (unsigned int i = 0; i < numBoxes; ++i)
     {
         if (scores[i] >= nmsScoreThreshold)
         {
             scoresAboveThreshold.push_back(scores[i]);
             indicesAboveThreshold.push_back(i);
         }
     }
 
     // Sort the indices based on scores.
     unsigned int numAboveThreshold = armnn::numeric_cast<unsigned int>(scoresAboveThreshold.size());
     std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
     TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
 
     // Number of output cannot be more than max detections specified in the option.
     unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
     std::vector<unsigned int> outputIndices;
     std::vector<bool> visited(numAboveThreshold, false);
 
     // Prune out the boxes with high intersection over union by keeping the box with higher score.
     for (unsigned int i = 0; i < numAboveThreshold; ++i)
     {
         if (outputIndices.size() >= numOutput)
         {
             break;
         }
         if (!visited[sortedIndices[i]])
         {
             outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
             for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
             {
                 unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
                 unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
                 if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
                 {
                     visited[sortedIndices[j]] = true;
                 }
             }
         }
     }
     return outputIndices;
 }

◆ numeric_cast() [1/9]

std::enable_if_t< std::is_unsigned<Source>::value && std::is_unsigned<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 35 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

Referenced by AllocateOutputData(), ArgMinMax(), armnnTfLiteParser::AsFloatArray(), CheckInferenceTimeThreshold(), ClArgMinMaxWorkload::ClArgMinMaxWorkload(), ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(), ClStridedSliceWorkload::ClStridedSliceWorkload(), ComputeReductionTensorShape(), armnnTfLiteParser::ComputeWrappedIndex(), OutputSlot::Connect(), CreateNetworkImpl< IParser >::Create(), OnnxParserImpl::CreateNetworkFromString(), DepthwiseConvolution2dAsymmetricTestImpl(), DepthwiseConvolution2dTestImpl(), DetectionPostProcess(), RefL2NormalizationWorkload::ExecuteAsync(), armnnUtils::ExpandDims(), FakeQuantization(), Gather(), MockCounterDirectory::GetCategoryCount(), MockCounterDirectory::GetCounterCount(), MockCounterDirectory::GetCounterSetCount(), MockCounterDirectory::GetDeviceCount(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), OutputSlot::GetNumConnections(), SubgraphView::GetNumInputSlots(), SubgraphView::GetNumOutputSlots(), StridedSliceDescriptor::GetStartForAxis(), StridedSliceDescriptor::GetStopForAxis(), GetStreamMetaDataPacketSize(), Cifar10Database::GetTestCaseData(), YoloDatabase::GetTestCaseData(), armnnUtils::GetUnsignedAxis(), RequestCountersPacketHandler::HandlePacket(), InferenceTestImage::InferenceTestImage(), PreluLayer::InferOutputShapes(), RefLayerSupport::IsMeanSupported(), LogSoftmax(), main(), LoadedNetwork::MakeLoadedNetwork(), NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(), NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(), NeonStridedSliceWorkload::NeonStridedSliceWorkload(), NonMaxSuppression(), ClassifierTestCaseProvider< TDatabase, InferenceModel >::OnInferenceTestFinished(), IDeserializer::DeserializerImpl::OutputShapeOfReshape(), TfLiteParserImpl::OutputShapeOfReshape(), ParseArray(), ParseDataArray< armnn::DataType::QAsymmS8 >(), ParseDataArray< armnn::DataType::QAsymmU8 >(), ParseDataArray< armnn::DataType::QSymmS8 >(), Pooling2d(), Pooling3d(), ClassifierTestCase< TTestCaseDatabase, TModel >::ProcessResult(), Reduce(), InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), ClContextSerializer::SaveSerializedToStream(), ISerializer::SerializerImpl::SaveSerializedToStream(), SimpleConvolution2dNhwcTestImpl(), SimpleConvolution2dTestImpl(), SimpleConvolution3dTestImpl(), InferenceTestImage::StbResize(), StridedSlice(), Graph::SubstituteSubgraph(), TEST_SUITE(), MeanQueueDescriptor::Validate(), ReduceLayer::ValidateTensorShapesFromInputs(), MeanLayer::ValidateTensorShapesFromInputs(), VerifyTimelineLabelBinaryPacketData(), and WorkingMemHandle::WorkingMemHandle().

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
                                         "narrower unsigned type. Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(source);
 }

◆ numeric_cast() [2/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 58 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
                                         "Overflow detected.");
     }
 
     if (source < std::numeric_limits<Dest>::lowest())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(source);
 }

◆ numeric_cast() [3/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 83 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Overflow detected.");
     }
 
     if (source < std::numeric_limits<Dest>::lowest())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(source);
 }

◆ numeric_cast() [4/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 109 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (source > static_cast<Source>(std::numeric_limits<Dest>::max()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Overflow detected.");
     }
 
     if (source < static_cast<Source>(std::numeric_limits<Dest>::lowest()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(source);
 }

◆ numeric_cast() [5/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source source )

Definition at line 135 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     Dest sourceConverted = static_cast<Dest>(source);
 
     if (sourceConverted > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
                                         "Overflow detected.");
     }
 
     if (sourceConverted < std::numeric_limits<Dest>::lowest())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
                                         "Underflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(source);
 }

◆ numeric_cast() [6/9]

std::enable_if_t< std::is_signed<Dest>::value && std::is_integral<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 165 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(sValue);
 }

◆ numeric_cast() [7/9]

std::enable_if_t< std::is_floating_point<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 184 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (static_cast<Dest>(sValue) > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to floating point type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
 
     return static_cast<Dest>(sValue);
 }

◆ numeric_cast() [8/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_signed<Source>::value && std::is_integral<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 206 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (sValue < 0)
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
                                         "Underflow detected.");
     }
 
     if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
     return static_cast<Dest>(sValue);
 }

◆ numeric_cast() [9/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_floating_point<Source>::value, Dest> armnn::numeric_cast ( Source sValue )

Definition at line 230 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

 {
 #if ENABLE_NUMERIC_CAST_CHECKS
     if (sValue < 0)
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
                                         "Underflow detected.");
     }
 
     if (sValue > static_cast<Source>(std::numeric_limits<Dest>::max()))
     {
         ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to unsigned type. "
                                         "Overflow detected.");
     }
 #endif // ENABLE_NUMERIC_CAST_CHECKS
     return static_cast<Dest>(sValue);
 }

◆ Offset()

unsigned int armnn::Offset	(	const TensorShape &	shape,
		unsigned int	batch,
		unsigned int	height,
		unsigned int	width,
		unsigned int	channels,
		const DataLayoutIndexed &	dataLayout
	)

inline

Definition at line 19 of file BatchToSpaceNd.cpp.

References DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), and NHWC.

Referenced by BatchToSpaceNd().

 {
     if (dataLayout.GetDataLayout() == DataLayout::NHWC)
     {
         return ((batch * shape[dataLayout.GetHeightIndex()] + height) * shape[dataLayout.GetWidthIndex()] + width) *
                shape[dataLayout.GetChannelsIndex()] + channels;
     }
     else
     {
         return ((batch * shape[dataLayout.GetChannelsIndex()] + channels) *
                shape[dataLayout.GetHeightIndex()] + height) *
                shape[dataLayout.GetWidthIndex()] + width;
     }
 }

◆ operator<<() [1/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const std::vector< Compute > &	compute
	)

inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 47 of file BackendId.hpp.

References GetComputeDeviceAsCString().

 {
     for (const Compute& comp : compute)
     {
         os << GetComputeDeviceAsCString(comp) << " ";
     }
     return os;
 }

◆ operator<<() [2/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const std::set< Compute > &	compute
	)

inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 58 of file BackendId.hpp.

References GetComputeDeviceAsCString().

 {
     for (const Compute& comp : compute)
     {
         os << GetComputeDeviceAsCString(comp) << " ";
     }
     return os;
 }

◆ operator<<() [3/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const BackendVersion &	backendVersion
	)

inline

Definition at line 68 of file IBackendInternal.hpp.

References BackendVersion::m_Major, and BackendVersion::m_Minor.

 {
     os << "[" << backendVersion.m_Major << "." << backendVersion.m_Minor << "]";
 
     return os;
 }

◆ operator<<() [4/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const Compute &	compute
	)

inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 69 of file BackendId.hpp.

References GetComputeDeviceAsCString().

 {
     os << GetComputeDeviceAsCString(compute);
     return os;
 }

◆ operator<<() [5/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const BFloat16 &	b
	)

inline

Definition at line 122 of file BFloat16.hpp.

References BFloat16::ToFloat32(), and BFloat16::Val().

 {
     os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
     return os;
 }

◆ operator<<() [6/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const BackendId &	id
	)

inline

Definition at line 176 of file BackendId.hpp.

 {
     os << id.Get();
     return os;
 }

◆ operator<<() [7/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const TContainer< BackendId, TContainerTemplateArgs... > &	ids
	)

Definition at line 183 of file BackendId.hpp.

 {
     os << '[';
     for (const auto& id : ids) { os << id << " "; }
     os << ']';
     return os;
 }

◆ operator<<() [8/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		Status	stat
	)

inline

Definition at line 297 of file TypesUtils.hpp.

References GetStatusAsCString().

 {
     os << GetStatusAsCString(stat);
     return os;
 }

◆ operator<<() [9/9]

std::ostream& armnn::operator<<	(	std::ostream &	os,
		const armnn::TensorShape &	shape
	)

inline

Definition at line 304 of file TypesUtils.hpp.

References Dequantize, TensorShape::GetNumDimensions(), and Quantize.

 {
     os << "[";
     for (uint32_t i=0; i<shape.GetNumDimensions(); ++i)
     {
         if (i!=0)
         {
             os << ",";
         }
         os << shape[i];
     }
     os << "]";
     return os;
 }

◆ operator>>() [1/2]

std::istream& armnn::operator>>	(	std::istream &	in,
		armnn::Compute &	compute
	)

inline

Definition at line 23 of file InferenceTest.hpp.

References ParseComputeDevice(), and Undefined.

 {
     std::string token;
     in >> token;
     compute = armnn::ParseComputeDevice(token.c_str());
     if (compute == armnn::Compute::Undefined)
     {
         in.setstate(std::ios_base::failbit);
         throw cxxopts::OptionException(fmt::format("Unrecognised compute device: {}", token));
     }
     return in;
 }

◆ operator>>() [2/2]

std::istream& armnn::operator>>	(	std::istream &	in,
		armnn::BackendId &	backend
	)

inline

Definition at line 36 of file InferenceTest.hpp.

References ParseComputeDevice(), and Undefined.

 {
     std::string token;
     in >> token;
     armnn::Compute compute = armnn::ParseComputeDevice(token.c_str());
     if (compute == armnn::Compute::Undefined)
     {
         in.setstate(std::ios_base::failbit);
         throw cxxopts::OptionException(fmt::format("Unrecognised compute device: {}", token));
     }
     backend = compute;
     return in;
 }

◆ Optimize() [1/2]

IOptimizedNetworkPtr Optimize	(	const INetwork &	network,
		const std::vector< BackendId > &	backendPreferences,
		const IDeviceSpec &	deviceSpec,
		const OptimizerOptions &	options = `OptimizerOptions()`,
		Optional< std::vector< std::string > &>	messages = `EmptyOptional()`
	)

Create an optimized version of the network.

Parameters

network	INetwork description of the network to be optimized.
backendPreferences	The choice of the backend ordered by user preferences.
deviceSpec	DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messages	If there are failures or warnings a string describing same will be added to the vector
options	OptimizerOptions object with optimizer configuration options

Returns: An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Examples:: AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, DynamicSample.cpp, and SimpleSample.cpp.

Definition at line 1847 of file Network.cpp.

References BackendOptions::Var::AsBool(), IOptimizedNetwork::Optimize, ParseOptions(), and INetwork::pNetworkImpl.

Referenced by armnn::experimental::AsyncEndToEndTestImpl(), armnn::experimental::AsyncThreadedEndToEndTestImpl(), GetSoftmaxProfilerJson(), InferenceModel< IParser, TDataType >::InferenceModel(), ParserFlatbuffersFixture::loadNetwork(), main(), QLstmEndToEnd(), QuantizedLstmEndToEnd(), ParserPrototxtFixture< TParser >::Setup(), ParserFlatbuffersSerializeFixture::Setup(), ParserPrototxtFixture< TParser >::SetupOptimizedNetwork(), TEST_CASE_FIXTURE(), TEST_SUITE(), VerifyPostOptimisationStructureTestImpl(), and IMemoryOptimizerStrategy::~IMemoryOptimizerStrategy().

 {
     return Optimize(inNetwork.pNetworkImpl->GetGraph(),
                     backendPreferences,
                     deviceSpec,
                     options,
                     messages);
 }

◆ Optimize() [2/2]

IOptimizedNetworkPtr Optimize	(	const Graph &	inGraph,
		const std::vector< BackendId > &	backendPreferences,
		const IDeviceSpec &	deviceSpec,
		const OptimizerOptions &	options,
		Optional< std::vector< std::string > &>	messages = `EmptyOptional()`
	)

Create an optimized version of the network.

Parameters

inGraph	Graph to be optimized.
backendPreferences	The choice of the backend ordered by user preferences.
deviceSpec	DeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messages	If there are failures or warnings a string describing same will be added to the vector
options	OptimizerOptions object with optimizer configuration options

Returns: An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Definition at line 1670 of file Network.cpp.

References Graph::AddCompatibilityLayers(), ApplyBackendOptimizations(), ARMNN_LOG, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), Graph::begin(), CreateSupportedBackends(), debug, IOptimizedNetwork::Destroy(), Graph::end(), BackendSettings::GetAvailablePreferredBackends(), ProfilerManager::GetInstance(), Graph::GetProfiler(), InferAndValidate, Graph::InferTensorInfos(), IOptimizedNetwork::IOptimizedNetwork(), OptimizerOptions::m_Debug, OptimizationResult::m_Error, OptimizerOptions::m_ImportEnabled, OptimizerOptions::m_ModelOptions, OptimizerOptions::m_ProfilingEnabled, OptimizerOptions::m_ReduceFp32ToBf16, OptimizerOptions::m_ReduceFp32ToFp16, OptimizerOptions::m_shapeInferenceMethod, BackendSettings::m_SupportedBackends, MakeOptimizations(), Optimizer::Pass(), IOptimizedNetwork::pOptimizedNetworkImpl, ProfilerManager::RegisterProfiler(), ReportError(), SelectTensorHandleStrategy(), OptimizerOptions::ToString(), Undefined, ValidateOnly, and Graph::VerifyConstantLayerSetTensorInfo().

 {
     ARMNN_LOG(debug) << options.ToString();
 
     // Enable profiling
     auto profiler = inGraph.GetProfiler();
     ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
     profiler->EnableProfiling(options.m_ProfilingEnabled);
 
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer");
     if (backendPreferences.empty())
     {
         throw InvalidArgumentException("Invoked Optimize with no backends specified");
     }
 
     if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
     {
         throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
     }
 
     // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
     inGraph.VerifyConstantLayerSetTensorInfo();
 
     std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
 
     auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), options.m_ModelOptions),
                                        &IOptimizedNetwork::Destroy);
 
     IOptimizedNetwork* optNetObjPtr = optNet.get();
 
     // Get the optimized graph
     Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
 
     if(options.m_shapeInferenceMethod == ShapeInferenceMethod::InferAndValidate)
     {
         // Infer the tensor infos for all output slots. Throws an exception on failure
         optGraph.InferTensorInfos();
     }
 
     // Perform AddBroadcastReshapeLayer optimisation
     using namespace optimizations;
     Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
 
     if(options.m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly)
     {
         // Validate the tensor infos for all output slots. Throws an exception on failure
         optGraph.InferTensorInfos();
     }
 
     // Need to FusePermuteIntoConstantLayer before FoldPadIntoDepthwiseConvolution2d or
     // FuseBatchNormIntoDepthwiseConvolution2D optimizations are called.
     Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer()));
 
     // Perform optimisation passes
     Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
                                                 SquashEqualTransposeSiblings(),
                                                 SquashEqualReshapeSiblings(),
                                                 OptimizeInversePermutes(),
                                                 OptimizeInverseTransposes(),
                                                 MovePermuteUp(),
                                                 MoveTransposeUp(),
                                                 PermuteAsReshape(),
                                                 TransposeAsReshape(),
                                                 OptimizeConsecutiveReshapes(),
                                                 FoldPadIntoConvolution2d(),
                                                 FoldPadIntoDepthwiseConvolution2d(),
                                                 FoldPadIntoPooling2d(),
                                                 PermuteAndBatchToSpaceAsDepthToSpace(),
                                                 TransposeAndBatchToSpaceAsDepthToSpace(),
                                                 FuseBatchNormIntoConvolution2DFloat32(),
                                                 FuseBatchNormIntoConvolution2DFloat16(),
                                                 FuseBatchNormIntoDepthwiseConvolution2DFloat32(),
                                                 FuseBatchNormIntoDepthwiseConvolution2DFloat16(),
                                                 ConvertConstDequantisationLayersToConstLayers()));
 
     // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
     if (options.m_ReduceFp32ToFp16)
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
     }
 
     // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
     // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
     // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
     if (options.m_ReduceFp32ToBf16)
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16");
         Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
     }
 
     // Initialize backend settings
     BackendSettings backendSettings(backendPreferences, deviceSpec);
     if (backendSettings.GetAvailablePreferredBackends().empty())
     {
         std::stringstream failureMsg;
         failureMsg << "None of the preferred backends " << backendPreferences
                    << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
         ReportError(failureMsg.str(), messages);
         throw InvalidArgumentException(failureMsg.str());
     }
 
     // Create a map to temporarily hold initialized backend objects
     TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
     BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
 
     // Assign an available backend to each layer
     Graph::Iterator firstLayer = optGraph.begin();
     Graph::Iterator lastLayer  = optGraph.end();
     OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
                                                              backendSettings,
                                                              firstLayer,
                                                              lastLayer,
                                                              messages);
     if (assignBackendsResult.m_Error)
     {
         // Failed to assign a backend to each layer
         throw InvalidArgumentException("Failed to assign a backend to each layer");
     }
 
     Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
                                                 OptimizeInverseConversionsFp32()));
 
     // Apply the backend-specific optimizations
     OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
                                                                              backendSettings,
                                                                              backends,
                                                                              options.m_ModelOptions,
                                                                              messages);
     if (backendOptimizationResult.m_Error)
     {
         // Failed to apply the backend-specific optimizations
         throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
     }
 
     // If the debug flag is set, then insert a DebugLayer after each layer
     // Doing this after applying the backend optimizations as they might have changed some layers
     if (options.m_Debug)
     {
         Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
     }
 
     // Calculate the compatibility strategies for tensor handles
     OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
                                                                    backends,
                                                                    tensorHandleFactoryRegistry,
                                                                    options.m_ImportEnabled,
                                                                    messages);
     if (strategyResult.m_Error)
     {
         // Failed to apply the backend-specific optimizations
         return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
     }
 
     // Based on the tensor handle strategy determined above, insert copy layers where required.
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
         optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
     }
 
     // Convert constants
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
 
         // Once the constants are converted we can now safely call RedirectMembersToConstantInputs
         Optimizer::Pass(optGraph, MakeOptimizations(RedirectMembersToConstantInputs()));
     }
     return optNet;
 }

◆ Pad()

void Pad	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const ITensorHandle *	inputHandle,
		ITensorHandle *	outputHandle,
		const PadQueueDescriptor &	data
	)

Definition at line 39 of file Pad.cpp.

References Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PadList, PadDescriptor::m_PadValue, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

 {
     auto padList  = data.m_Parameters.m_PadList;
     auto padValue = data.m_Parameters.m_PadValue;
 
     unsigned int numOutputElements = outputInfo.GetNumElements();
 
     TensorShape outputShape = outputInfo.GetShape();
     TensorShape inputShape  = inputInfo.GetShape();
 
     unsigned int numInputDimensions = inputShape.GetNumDimensions();
 
 #ifndef NDEBUG
 
     unsigned int numOutputDimensions = outputShape.GetNumDimensions();
     assert(numInputDimensions == numOutputDimensions);
 
 #endif
 
     unsigned int inputBatches  = 0;
     unsigned int inputChannels = 0;
     unsigned int inputHeight   = 0;
     unsigned int inputWidth    = 0;
 
     unsigned int outputChannels = 0;
     unsigned int outputHeight   = 0;
     unsigned int outputWidth    = 0;
 
     auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
     auto outData   = MakeEncoder<float>(outputInfo, outputHandle->Map());
 
     // Fill the output tensor with Pad value first
     if (outputInfo.IsQuantized())
     {
         // For Quantized types Pad Value should not be quantized with scale and offset of the tensor info
         auto temporaryInfo = TensorInfo(outputInfo.GetShape(), outputInfo.GetDataType(), 1.0f, 0);
         auto outputData = MakeEncoder<float>(temporaryInfo, outputHandle->Map());
         FillOutputWithPadValue(*outputData, padValue, numOutputElements);
     }
     else
     {
         FillOutputWithPadValue(*outData, padValue, numOutputElements);
     }
 
     Decoder<float>& input  = *inputData;
     Encoder<float>& output = *outData;
 
     switch(numInputDimensions) {
 
         case 1:
             inputWidth = inputShape[0];
             for (unsigned int w = 0; w < inputWidth ; w++)
             {
                 input[w];
                 auto inputValue = input.Get();
                 auto outputIndex = w + std::get<0>(padList[0]);
                 output[outputIndex];
                 output.Set(inputValue);
             }
 
             break;
         case 2  :
             inputHeight = inputShape[0];
             inputWidth  = inputShape[1];
             outputWidth = outputShape[1];
 
             for (unsigned int h = 0; h < inputHeight; h++)
             {
                 for (unsigned int w = 0; w < inputWidth ; w++)
                 {
                     input[h * inputWidth + w];
                     auto inputValue  = input.Get();
                     auto outputIndex = (h + std::get<0>(padList[0])) * outputWidth + (w + std::get<0>(padList[1]));
                     output[outputIndex];
                     output.Set(inputValue);
                 }
             }
 
             break;
         case 3  :
             inputChannels = inputShape[0];
             inputHeight   = inputShape[1];
             inputWidth    = inputShape[2];
             outputHeight  = outputShape[1];
             outputWidth   = outputShape[2];
 
             for (unsigned int c = 0; c < inputChannels; c++)
             {
                 for (unsigned int h = 0; h < inputHeight; h++)
                 {
                     for (unsigned int w = 0; w < inputWidth ; w++)
                     {
                         input[c * inputHeight * inputWidth + h * inputWidth + w];
                         auto inputValue  = input.Get();
                         auto outputIndex = (c + std::get<0>(padList[0])) * outputHeight * outputWidth
                                            + (h + std::get<0>(padList[1])) * outputWidth
                                            + (w + std::get<0>(padList[2]));
                         output[outputIndex];
                         output.Set(inputValue);
                     }
                 }
             }
 
             break;
         case 4  :
             inputBatches   = inputShape[0];
             inputChannels  = inputShape[1];
             inputHeight    = inputShape[2];
             inputWidth     = inputShape[3];
             outputChannels = outputShape[1];
             outputHeight   = outputShape[2];
             outputWidth    = outputShape[3];
 
             for (unsigned int b = 0; b < inputBatches; b++)
             {
                 for (unsigned int c = 0; c < inputChannels; c++)
                 {
                     for (unsigned int h = 0; h < inputHeight; h++)
                     {
                         for (unsigned int w = 0; w < inputWidth ; w++)
                         {
                             input[b * inputChannels * inputHeight * inputWidth
                                       + c * inputHeight * inputWidth
                                       + h * inputWidth
                                       + w];
                             auto inputValue  = input.Get();
                             auto outputIndex = (b + std::get<0>(padList[0]))
                                                * outputChannels * outputHeight * outputWidth
                                                + (c + std::get<0>(padList[1])) * outputHeight * outputWidth
                                                + (h + std::get<0>(padList[2])) * outputWidth
                                                + (w + std::get<0>(padList[3]));
                             output[outputIndex];
                             output.Set(inputValue);
                         }
                     }
                 }
             }
 
             break;
         default :
             break;
     }
 }

◆ ParseBoolean()

bool armnn::ParseBoolean	(	const BackendOptions::Var &	value,
		bool	defaultValue
	)

Definition at line 97 of file ClBackendContext.cpp.

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

 {
     if (value.IsBool())
     {
         return value.AsBool();
     }
     return defaultValue;
 }

◆ ParseBooleanBackendOption()

bool armnn::ParseBooleanBackendOption	(	const armnn::BackendOptions::Var &	value,
		bool	defaultValue
	)

inline

Definition at line 312 of file BackendOptions.hpp.

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

 {
     if (value.IsBool())
     {
         return value.AsBool();
     }
     return defaultValue;
 }

◆ ParseComputeDevice()

constexpr armnn::Compute armnn::ParseComputeDevice ( const char * str )

Deprecated function that will be removed together with the Compute enum.

Definition at line 182 of file TypesUtils.hpp.

References CpuAcc, CpuRef, GpuAcc, StrEqual(), and Undefined.

Referenced by operator>>().

 {
     if (armnn::StrEqual(str, "CpuAcc"))
     {
         return armnn::Compute::CpuAcc;
     }
     else if (armnn::StrEqual(str, "CpuRef"))
     {
         return armnn::Compute::CpuRef;
     }
     else if (armnn::StrEqual(str, "GpuAcc"))
     {
         return armnn::Compute::GpuAcc;
     }
     else
     {
         return armnn::Compute::Undefined;
     }
 }

◆ ParseFile()

std::string armnn::ParseFile	(	const BackendOptions::Var &	value,
		std::string	defaultValue
	)

Definition at line 106 of file ClBackendContext.cpp.

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

Referenced by ClBackendContext::ClBackendContext(), and ClBackendModelContext::ClBackendModelContext().

 {
     if (value.IsString())
     {
         return value.AsString();
     }
     return defaultValue;
 }

◆ ParseIntBackendOption()

int armnn::ParseIntBackendOption	(	const armnn::BackendOptions::Var &	value,
		int	defaultValue
	)

inline

Definition at line 330 of file BackendOptions.hpp.

References BackendOptions::Var::AsInt(), and BackendOptions::Var::IsInt().

Referenced by ClBackendModelContext::ClBackendModelContext().

 {
     if (value.IsInt())
     {
         return value.AsInt();
     }
     return defaultValue;
 }

◆ ParseOptions()

void armnn::ParseOptions	(	const std::vector< BackendOptions > &	options,
		BackendId	backend,
		F	f
	)

Definition at line 297 of file BackendOptions.hpp.

References BackendOptions::BackendOption::GetName(), and BackendOptions::BackendOption::GetValue().

Referenced by ClBackendContext::ClBackendContext(), ClBackendModelContext::ClBackendModelContext(), NeonBackendModelContext::NeonBackendModelContext(), Optimize(), and RuntimeImpl::RuntimeImpl().

 {
     for (auto optionsGroup : options)
     {
         if (optionsGroup.GetBackendId() == backend)
         {
             for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
             {
                 const BackendOptions::BackendOption option = optionsGroup.GetOption(i);
                 f(option.GetName(), option.GetValue());
             }
         }
     }
 }

◆ ParseStringBackendOption()

std::string armnn::ParseStringBackendOption	(	const armnn::BackendOptions::Var &	value,
		std::string	defaultValue
	)

inline

Definition at line 321 of file BackendOptions.hpp.

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

 {
     if (value.IsString())
     {
         return value.AsString();
     }
     return defaultValue;
 }

◆ ParseTuningLevel()

TuningLevel armnn::ParseTuningLevel	(	const BackendOptions::Var &	value,
		TuningLevel	defaultValue
	)

Definition at line 79 of file ClBackendContext.cpp.

References ARMNN_LOG, BackendOptions::Var::AsInt(), Exhaustive, BackendOptions::Var::IsInt(), None, and warning.

Referenced by ClBackendContext::ClBackendContext().

 {
     if (value.IsInt())
     {
         int v = value.AsInt();
         if (v > static_cast<int>(TuningLevel::Exhaustive) ||
             v < static_cast<int>(TuningLevel::None))
         {
             ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
                                   "Using default(" << static_cast<int>(defaultValue) << ")";
         } else
         {
             return static_cast<TuningLevel>(v);
         }
     }
     return defaultValue;
 }

◆ PermuteTensor()

armnn::ConstTensor PermuteTensor	(	const ConstTensorHandle *	tensor,
		const PermutationVector &	permutationVector,
		void *	permuteBuffer
	)

Definition at line 18 of file WorkloadUtils.cpp.

References ARMNN_ASSERT_MSG, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), GetDataTypeSize(), TensorInfo::GetNumBytes(), TensorInfo::GetShape(), PermutationVector::GetSize(), ConstTensorHandle::GetTensorInfo(), Permute, armnnUtils::Permuted(), and TensorInfo::SetConstant().

Referenced by Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), ConvertWeightTensorFromArmnnToAcl(), and GatherTensorHandlePairs().

 {
     ARMNN_ASSERT_MSG(tensor, "Invalid input tensor");
     ARMNN_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
 
     TensorInfo tensorInfo = tensor->GetTensorInfo();
 
     if (permutationVector.GetSize() > 0)
     {
         tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
         armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
                             tensor->GetConstTensor<void>(), permuteBuffer,
                             GetDataTypeSize(tensorInfo.GetDataType()));
     }
     else
     {
         ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
     }
     tensorInfo.SetConstant(true);
     return ConstTensor(tensorInfo, permuteBuffer);
 }

◆ PolymorphicDowncast()

DestType armnn::PolymorphicDowncast ( SourceType * value )

Polymorphic downcast for build in pointers only.

Usage: Child* pChild = PolymorphicDowncast<Child*>(pBase);

Template Parameters

DestType	Pointer type to the target object (Child pointer type)
SourceType	Pointer type to the source object (Base pointer type)

Parameters

value Pointer to the source object

Returns: Pointer of type DestType (Pointer of type child)

Definition at line 74 of file PolymorphicDowncast.hpp.

References ARMNN_POLYMORPHIC_CAST_CHECK.

Referenced by ClLayerSupport::IsLayerSupported(), and NeonLayerSupport::IsLayerSupported().

 {
     static_assert(std::is_pointer<DestType>::value,
                   "PolymorphicDowncast only works with pointer types.");
 
     ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == value);
     return static_cast<DestType>(value);
 }

◆ PolymorphicPointerDowncast()

auto armnn::PolymorphicPointerDowncast ( const SourceType & value )

Polymorphic downcast for shared pointers and build in pointers.

Usage: auto pChild = PolymorphicPointerDowncast<Child>(pBase)

Template Parameters

DestType	Type of the target object (Child type)
SourceType	Pointer type to the source object (Base (shared) pointer type)

Parameters

value Pointer to the source object

Returns: Pointer of type DestType ((Shared) pointer of type child)

Definition at line 93 of file PolymorphicDowncast.hpp.

References ARMNN_POLYMORPHIC_CAST_CHECK.

 {
     ARMNN_POLYMORPHIC_CAST_CHECK(utility::DynamicPointerCast<DestType>(value)
                                  == value);
     return utility::StaticPointerCast<DestType>(value);
 }

◆ Pooling2d()

void Pooling2d	(	Decoder< float > &	rInputDecoder,
		Encoder< float > &	rOutputEncoder,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const Pooling2dDescriptor &	params
	)

Computes the Pooling2d operation.

Definition at line 142 of file Pooling2d.cpp.

Referenced by Pooling2d(), Pooling2dLayer::Pooling2dLayer(), and TEST_SUITE().

 {
     const DataLayoutIndexed dataLayout(params.m_DataLayout);
     auto channelsIndex = dataLayout.GetChannelsIndex();
     auto heightIndex = dataLayout.GetHeightIndex();
     auto widthIndex = dataLayout.GetWidthIndex();
 
     const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
     const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
     const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
     const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
     const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
     const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
     const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
     const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
     const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
     const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
     const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
     const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
     const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
     const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
 
     float defaultInitializer = DefaultInitializer(params.m_PoolType);
 
     Accumulator accumulate = GetAccumulator(params.m_PoolType);
     Executor execute       = GetExecutor(params.m_PoolType);
 
     // Check supported padding methods outside the loop to simplify
     // the inner loop.
     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
         params.m_PaddingMethod != PaddingMethod::IgnoreValue)
     {
         throw armnn::InvalidArgumentException("Unsupported padding type");
     }
 
     const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
 
     for (int n = 0; n < batchSize; n++)
     {
         for (int c = 0; c < channels; c++)
         {
             for (int yOutput = 0; yOutput < heightOutput; yOutput++)
             {
                 //  Calculate values independent of the x axis
                 int hstart = (yOutput * strideY) - padTop;
                 int hend = hstart + poolHeight;
                 // Clamp the pooling region inside the valid input area (which includes the padding).
                 // This is necessary because the final pooling in a row may overlap beyond the padding.
                 hend = std::min(hend, heightInput + padBottom);
 
                 int height = hend - hstart;
                 bool hclamped = ClampRange(hstart, hend, heightInput);
 
                 for (int xOutput = 0; xOutput < widthOutput; xOutput++)
                 {
                     int wstart = (xOutput * strideX) - padLeft;
                     int wend = wstart + poolWidth;
 
                     // Clamp the pooling region inside the valid input area (which includes the padding).
                     // This is necessary because the final pooling in a row may overlap beyond the padding.
                     wend = std::min(wend, widthInput + padRight);
 
                     float result = defaultInitializer;
                     float poolAreaSize = armnn::numeric_cast<float>(height * (wend - wstart));
 
                     // Special case: when the pooling kernel is over a padding region and the padding
                     //               size is larger or equal to the kernel and the kernel only covers
                     //               padding and no real values, then we initialize the result as zero
                     //               by convention. This is because we need to choose a value here and
                     //               all values we have are padding, which we ignore.
                     if (OnPaddingOnly(hstart, hend, heightInput) ||
                         OnPaddingOnly(wstart, wend, widthInput))
                     {
                         result = 0.0f;
 
                         int outputIndex;
 
                         if(dataLayout.GetDataLayout() == DataLayout::NHWC)
                         {
                             outputIndex = n * heightOutput * widthOutput * channels +
                                           yOutput * widthOutput * channels +
                                           xOutput * channels +
                                           c;
                         }
                         else
                         {
                             outputIndex = n * heightOutput * widthOutput * channels +
                                           c * heightOutput * widthOutput +
                                           yOutput * widthOutput +
                                           xOutput;
                         }
 
                         rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                         rOutputEncoder.Set(result);
                         continue;
                     }
 
                     bool clamped = hclamped |= ClampRange(wstart, wend, widthInput);
 
                     if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
                     {
                         // When we exclude the padding, it means we calculate with a smaller
                         // kernel size, so I changed the divisor here.
                         poolAreaSize = armnn::numeric_cast<float>((hend - hstart) * (wend - wstart));
                     }
 
                     for (auto yInput = hstart; yInput < hend; yInput++)
                     {
                         for (auto xInput = wstart; xInput < wend; xInput++)
                         {
 
                             int inputIndex;
                             if(dataLayout.GetDataLayout() == DataLayout::NHWC)
                             {
                                 inputIndex = n * heightInput * widthInput * channels +
                                              yInput * widthInput * channels +
                                              xInput * channels +
                                              c;
 
                             }
                             else
                             {
                                 inputIndex = n * heightInput * widthInput * channels +
                                              c * heightInput * widthInput +
                                              yInput * widthInput +
                                              xInput;
                             }
 
                             accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
                         }
                     }
 
                     execute(result, poolAreaSize);
 
                     int outputIndex;
 
                     if(dataLayout.GetDataLayout() == DataLayout::NHWC)
                     {
                         outputIndex = n * heightOutput * widthOutput * channels +
                                       yOutput * widthOutput * channels +
                                       xOutput * channels +
                                       c;
                     }
                     else
                     {
                         outputIndex = n * heightOutput * widthOutput * channels +
                                       c * heightOutput * widthOutput +
                                       yOutput * widthOutput +
                                       xOutput;
                     }
 
                     rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                     rOutputEncoder.Set(result);
                 }
             }
         }
     }
 }

◆ Pooling3d()

void Pooling3d	(	Decoder< float > &	rInputDecoder,
		Encoder< float > &	rOutputEncoder,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const Pooling3dDescriptor &	params
	)

Computes the Pooling3d operation.

Definition at line 172 of file Pooling3d.cpp.

Referenced by Pooling3d(), and Pooling3dLayer::Pooling3dLayer().

 {
     const DataLayoutIndexed dataLayout(params.m_DataLayout);
 
     auto channelsIndex = dataLayout.GetChannelsIndex();
 
     auto depthIndex = dataLayout.GetDepthIndex();
     auto heightIndex = dataLayout.GetHeightIndex();
     auto widthIndex = dataLayout.GetWidthIndex();
 
     const int batchSize    = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
     const int channels     = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
 
     const int depthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
     const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
     const int widthOutput  = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
 
     const int depthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
     const int heightInput  = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
     const int widthInput   = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
 
     const int padLeft      = armnn::numeric_cast<int>(params.m_PadLeft);
     const int padRight     = armnn::numeric_cast<int>(params.m_PadRight);
     const int padTop       = armnn::numeric_cast<int>(params.m_PadTop);
     const int padBottom    = armnn::numeric_cast<int>(params.m_PadBottom);
     const int padFront     = armnn::numeric_cast<int>(params.m_PadFront);
     const int padBack      = armnn::numeric_cast<int>(params.m_PadBack);
 
     const int strideX      = armnn::numeric_cast<int>(params.m_StrideX);
     const int strideY      = armnn::numeric_cast<int>(params.m_StrideY);
     const int strideZ      = armnn::numeric_cast<int>(params.m_StrideZ);
 
     const int poolHeight   = armnn::numeric_cast<int>(params.m_PoolHeight);
     const int poolWidth    = armnn::numeric_cast<int>(params.m_PoolWidth);
     const int poolDepth    = armnn::numeric_cast<int>(params.m_PoolDepth);
 
     float defaultInitializer = DefaultInitializer(params.m_PoolType);
     Accumulator accumulate = GetAccumulator(params.m_PoolType);
     Executor execute       = GetExecutor(params.m_PoolType);
 
     // Check supported padding methods outside the loop to simplify
     // the inner loop.
     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
         params.m_PaddingMethod != PaddingMethod::IgnoreValue)
     {
         throw armnn::InvalidArgumentException("Unsupported padding type");
     }
 
     const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
 
     for (int n = 0; n < batchSize; n++)
     {
         for (int c = 0; c < channels; c++)
         {
             for (int zOutput = 0; zOutput < depthOutput; zOutput++)
             {
                 //  Calculate values independent of the x and y axis
                 int dstart = (zOutput * strideZ) - padFront;
                 int dend = dstart + poolDepth;
                 // Clamp the pooling region inside the valid input area (which includes the padding).
                 // This is necessary because the final pooling in a row may overlap beyond the padding.
                 dend = std::min(dend, depthInput + padBack);
 
                 int depth = dend - dstart;
                 bool dclamped = ClampRange(dstart, dend, depthInput);
                 int depthClamped = dend - dstart;
 
                 for (int yOutput = 0; yOutput < heightOutput; yOutput++)
                 {
                     int hstart = (yOutput * strideY) - padTop;
                     int hend = hstart + poolHeight;
                     // Clamp the pooling region inside the valid input area (which includes the padding).
                     // This is necessary because the final pooling in a row may overlap beyond the padding.
                     hend = std::min(hend, heightInput + padBottom);
 
                     int height = hend - hstart;
                     bool hclamped = ClampRange(hstart, hend, heightInput);
                     int heightClamped = hend - hstart;
 
                     for (int xOutput = 0; xOutput < widthOutput; xOutput++)
                     {
                         int wstart = (xOutput * strideX) - padLeft;
                         int wend = wstart + poolWidth;
                         // Clamp the pooling region inside the valid input area (which includes the padding).
                         // This is necessary because the final pooling in a row may overlap beyond the padding.
                         wend = std::min(wend, widthInput + padRight);
 
                         int width = wend - wstart;
                         bool wclamped = ClampRange(wstart, wend, widthInput);
                         int widthClamped = wend - wstart;
 
                         float result = defaultInitializer;
                         float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
 
                         // Special case: when the pooling kernel is over a padding region and the padding
                         //               size is larger or equal to the kernel and the kernel only covers
                         //               padding and no real values, then we initialize the result as zero
                         //               by convention. This is because we need to choose a value here and
                         //               all values we have are padding, which we ignore.
                         if (OnPaddingOnly(dstart, dend, depthInput) ||
                             OnPaddingOnly(hstart, hend, heightInput) ||
                             OnPaddingOnly(wstart, wend, widthInput))
                         {
                             result = 0.0f;
 
                             int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
                                 n, c, zOutput, yOutput, xOutput, dataLayout);
 
                             rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                             rOutputEncoder.Set(result);
 
                             continue;
                         }
 
                         bool clamped = (dclamped | hclamped | wclamped);
 
                         if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
                         {
                             // When we exclude the padding, it means we calculate with a smaller
                             // kernel size, so I changed the divisor here.
                             poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
                         }
 
                         for (auto zInput = dstart; zInput < dend; zInput++)
                         {
                             for (auto yInput = hstart; yInput < hend; yInput++)
                             {
                                 for (auto xInput = wstart; xInput < wend; xInput++)
                                 {
 
                                     int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
                                 n, c, zInput, yInput, xInput, dataLayout);
 
                                     accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
                                 }
                             }
                         }
 
                         execute(result, poolAreaSize);
 
                         int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
                             n, c, zOutput, yOutput, xOutput, dataLayout);
 
                         rOutputEncoder[static_cast<unsigned int>(outputIndex)];
                         rOutputEncoder.Set(result);
                     }
                 }
             }
         }
     }
 }

◆ PreluImpl()

void PreluImpl	(	const TensorInfo &	inputInfo,
		const TensorInfo &	alphaInfo,
		const TensorInfo &	outputInfo,
		Decoder< float > &	inputData,
		Decoder< float > &	alphaData,
		Encoder< float > &	outputData
	)

Definition at line 13 of file PreluImpl.cpp.

References TensorInfo::GetShape(), and BroadcastLoop::Unroll().

Referenced by RefPreluWorkload::ExecuteAsync().

 {
     const TensorShape& inputShape  = inputInfo.GetShape();
     const TensorShape& alphaShape  = alphaInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
 
     // PReLU activation: f(x) = alpha * x for x < 0, f(x) = x for x >= 0
     auto prelu = [](float x, float alpha)
     {
         return x < 0 ? alpha * x : x;
     };
 
     BroadcastLoop(inputShape, alphaShape, outputShape).Unroll(prelu, 0, inputData, alphaData, outputData);
 }

◆ ProfilingUpdateDescriptions()

void armnn::ProfilingUpdateDescriptions	(	const std::string &	name,
		const DescriptorType &	desc,
		const WorkloadInfo &	infos,
		const arm::pipe::ProfilingGuid	guid
	)

inline

< Profiler used

Definition at line 180 of file Profiling.hpp.

References ProfilerManager::GetInstance(), and IProfiler::IsProfilingEnabled().

 {
     IProfiler* profiler(ProfilerManager::GetInstance().GetProfiler()); ///< Profiler used
     if (profiler && profiler->IsProfilingEnabled())
     {
         profiler->AddLayerDetails(name, desc, infos, guid);
     }
 }

◆ Quantize() [1/2]

void armnn::Quantize	(	uint8_t *	quant,
		const float *	dequant,
		const TensorInfo &	info
	)

inline

Definition at line 114 of file RefWorkloadUtils.hpp.

References TensorInfo::GetNumElements(), TensorInfo::GetQuantizationOffset(), and TensorInfo::GetQuantizationScale().

 {
     for (size_t i = 0; i < info.GetNumElements(); i++)
     {
         quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
     }
 }

◆ Quantize() [2/2]

template int32_t Quantize< int32_t >	(	float	value,
		float	scale,
		int32_t	offset
	)

Quantize a floating point data type into an 8-bit data type.

Explicit specialization of Quantize for int32_t.

Explicit specialization of Quantize for int16_t.

Explicit specialization of Quantize for uint8_t.

Explicit specialization of Quantize for int8_t.

Parameters

value	- The value to quantize.
scale	- The scale (must be non-zero).
offset	- The offset.

Returns: - The quantized value calculated as round(value/scale)+offset.

Definition at line 30 of file TypesUtils.cpp.

References ARMNN_ASSERT.

Referenced by TEST_SUITE().

 {
     static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
     constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
     constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
     ARMNN_ASSERT(scale != 0.f);
     ARMNN_ASSERT(!std::isnan(value));
 
     float clampedValue = std::min(std::max((static_cast<float>(offset) + static_cast<float>(round(value/scale))),
                                             static_cast<float>(min)), static_cast<float>(max));
     auto quantizedBits = static_cast<QuantizedType>(clampedValue);
 
     return quantizedBits;
 }

◆ Reduce()

void Reduce	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		Decoder< float > &	input,
		Encoder< float > &	output,
		const std::vector< uint32_t >	axis,
		const ReduceOperation	reduceOperation
	)

Definition at line 70 of file Reduce.cpp.

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), Max, Mean, Min, NextIndex(), numeric_cast(), Prod, ReducedOutputOffset(), Encoder< IType >::Set(), and Sum.

 {
     armnn::TensorShape inputDims = inputInfo.GetShape();
     unsigned int inputNumDims    = inputInfo.GetNumDimensions();
     unsigned int numOutputs      = outputInfo.GetNumElements();
 
     // Initialise temp output
     std::vector<float> tempOut(numOutputs);
     switch(reduceOperation)
     {
         case ReduceOperation::Mean:
         case ReduceOperation::Sum:
             std::fill(tempOut.begin(), tempOut.end(), 0.0f);
             break;
         case ReduceOperation::Prod:
             std::fill(tempOut.begin(), tempOut.end(), 1.0f);
             break;
         case ReduceOperation::Max:
             std::fill(tempOut.begin(), tempOut.end(), -1 * std::numeric_limits<float>::max());
             break;
         case ReduceOperation::Min:
             std::fill(tempOut.begin(), tempOut.end(), std::numeric_limits<float>::max());
             break;
         default:
             throw armnn::InvalidArgumentException("Unknown reduce method: " +
                 std::to_string(static_cast<int>(reduceOperation)));
     }
 
     // Initialise temp index
     std::vector<unsigned int> tempIndex(inputNumDims, 0);
 
     std::vector<unsigned int> resolvedAxis = axis;
     if (resolvedAxis.empty())
     {
         for (unsigned int idx = 0; idx < inputNumDims; ++idx)
         {
             resolvedAxis.push_back(idx);
         }
     }
     auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
 
     // Iterates through input_data and operates over the reduced axis
     for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
     {
         unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
         unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
                                                         numResolvedAxis, resolvedAxis);
         input[inputOffset];
         auto inputValue = input.Get();
         switch(reduceOperation)
         {
             case ReduceOperation::Mean:
             case ReduceOperation::Sum:
                 tempOut[outputOffset] += inputValue;
                 break;
             case ReduceOperation::Prod:
                 tempOut[outputOffset] *= inputValue;
                 break;
             case ReduceOperation::Max:
                 if (inputValue > tempOut[outputOffset])
                 {
                     tempOut[outputOffset] = inputValue;
                 }
                 break;
             case ReduceOperation::Min:
                 if (inputValue < tempOut[outputOffset])
                 {
                     tempOut[outputOffset] = inputValue;
                 }
                 break;
             default:
                 throw armnn::InvalidArgumentException("Unknown reduce method: " +
                     std::to_string(static_cast<int>(reduceOperation)));
         }
     }
 
     // Takes average by num of elements added to get MEAN
     size_t numElementsInAxis = 1;
     for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
     {
         unsigned int current = inputDims[resolvedAxis[idx]];
         ARMNN_ASSERT(armnn::numeric_cast<float>(current) <
                      (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
         numElementsInAxis *= current;
     }
 
     for (unsigned int idx = 0; idx < numOutputs; ++idx)
     {
         output[idx];
         if (reduceOperation == ReduceOperation::Mean)
         {
             if (numElementsInAxis > 0)
             {
                 output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis));
             }
         }
         else
         {
             output.Set(tempOut[idx]);
         }
     }
 }

◆ ReducedOutputOffset()

unsigned int armnn::ReducedOutputOffset	(	const unsigned int	numDims,
		const armnn::TensorShape &	dims,
		std::vector< unsigned int > &	index,
		const unsigned int	numAxis,
		const std::vector< unsigned int > &	axis
	)

Definition at line 40 of file Reduce.cpp.

Referenced by Reduce().

 {
     unsigned int offset = 0;
     for (unsigned int idx = 0; idx < numDims; ++idx)
     {
         bool isAxis = false;
         if (!axis.empty())
         {
             for (unsigned int axisIdx = 0; axisIdx < numAxis; ++axisIdx)
             {
                 if (idx == axis[axisIdx])
                 {
                     isAxis = true;
                     break;
                 }
             }
         }
         if (!isAxis)
         {
             offset = offset * dims[idx] + index[idx];
         }
     }
     return offset;
 }

◆ RefBackendId()

constexpr const char* armnn::RefBackendId ( )

Definition at line 10 of file RefBackendId.hpp.

Referenced by RefBackend::GetIdStatic().

10 { return "CpuRef"; }

◆ RefTensorHandleFactoryId()

constexpr const char* armnn::RefTensorHandleFactoryId ( )

Definition at line 15 of file RefTensorHandleFactory.hpp.

Referenced by RefTensorHandleFactory::GetIdStatic().

15 { return "Arm/Ref/TensorHandleFactory"; }

◆ ReorderWeightChannelsForAcl()

ConstTensor armnn::ReorderWeightChannelsForAcl	(	const ConstTensor &	weightHandle,
		DataLayout	dataLayout,
		void *	permuteBuffer
	)

Definition at line 67 of file WorkloadUtils.cpp.

References BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetNumBytes(), BaseTensor< MemoryType >::GetShape(), NCHW, and NHWC.

 {
     DataType* weight = static_cast<DataType*>(permuteBuffer);
     const TensorShape& weightShape = weightHandle.GetShape();
     unsigned int multiplier;
     unsigned int height;
     unsigned int width;
     unsigned int inputChannels;
     switch (dataLayout)
     {
         case DataLayout::NHWC:    //It actually is [ H, W, I, M ]
             height        = weightShape[0];
             width         = weightShape[1];
             inputChannels = weightShape[2];
             multiplier    = weightShape[3];
             break;
         case DataLayout::NCHW:    //It actually is [ M, I, H, W ]
         default:
             height        = weightShape[2];
             width         = weightShape[3];
             inputChannels = weightShape[1];
             multiplier    = weightShape[0];
             break;
     }
 
     std::vector<DataType> weightAclOrder(height*width*inputChannels*multiplier);
     unsigned int destinationWeightsChannel;
     unsigned int totalChannels = inputChannels * multiplier;
     unsigned int channelSize   = height * width;
     unsigned int inputChannel  = 0;
 
     for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++)
     {
         inputChannel = originWeightsChannel % inputChannels;
         destinationWeightsChannel = (originWeightsChannel - inputChannel) / inputChannels + multiplier * inputChannel;
 
         for (unsigned int i = 0; i < channelSize; i++)
         {
             weightAclOrder[i + destinationWeightsChannel * channelSize] =
                     weight[i + originWeightsChannel * channelSize];
         }
     }
 
     ::memcpy(permuteBuffer, weightAclOrder.data(), weightHandle.GetInfo().GetNumBytes());
     return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
 }

◆ ReplaceLayers()

void armnn::ReplaceLayers	(	OptimizationViews &	optimizationViews,
		LayerType *	baseLayer,
		std::vector< IConnectableLayer *> &	layers
	)

Definition at line 364 of file ArmComputeSubgraphUtils.hpp.

References OptimizationViews::AddSubstitution().

 {
     std::list<IConnectableLayer*> replacementLayers(layers.begin(), layers.end());
 
     SubgraphView substitutionSubgraph(baseLayer);
     SubgraphView replacementSubgraph(std::move(replacementLayers),
                                      CreateIInputsFrom({replacementLayers.front()}),
                                      CreateIOutputsFrom({replacementLayers.back()}));
 
     optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
 }

◆ ReportError()

void armnn::ReportError	(	const std::string &	errorMessage,
		Optional< std::vector< std::string > &>	errorMessages
	)

Definition at line 556 of file Network.cpp.

References ARMNN_LOG, and warning.

Referenced by AssignBackends(), CheckScaleSetOnQuantizedType(), Optimize(), and ReturnWithError().

 {
     std::stringstream fullErrorMessage;
     fullErrorMessage << "ERROR: " << errorMessage;
     ARMNN_LOG(warning) << fullErrorMessage.str();
     if (errorMessages)
     {
         errorMessages.value().push_back(fullErrorMessage.str());
     }
 }

◆ ReportUntouchedLayers()

void armnn::ReportUntouchedLayers	(	OptimizationViews &	optimizationViews,
		std::map< LayerGuid, Layer *>	untouched
	)

inline

Definition at line 82 of file ArmComputeSubgraphUtils.hpp.

References OptimizationViews::AddUntouchedSubgraph().

Referenced by NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

 {
     std::vector<Layer*> untouchedVector;
     for (const auto& pair : untouched)
     {
         Layer* layer = pair.second;
         SubgraphView subgraphView({layer},
                                   CreateIInputsFrom({layer}),
                                   CreateIOutputsFrom({layer}));
         optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
     }
 }

◆ ReportWarning()

void armnn::ReportWarning	(	const std::string &	warningMessage,
		Optional< std::vector< std::string > &>	warningMessages
	)

Definition at line 568 of file Network.cpp.

References ARMNN_LOG, and warning.

Referenced by ApplyBackendOptimizations(), and AttemptBackendAssignment().

 {
     std::stringstream fullWarningMessage;
     fullWarningMessage << "WARNING: " << warningMessage;
     ARMNN_LOG(warning) << fullWarningMessage.str();
     if (warningMessages)
     {
         warningMessages.value().push_back(fullWarningMessage.str());
     }
 }

◆ RequiresCopy()

bool armnn::RequiresCopy	(	ITensorHandleFactory::FactoryId	src,
		ITensorHandleFactory::FactoryId	dst,
		TensorHandleFactoryRegistry &	registry
	)

Definition at line 1247 of file Network.cpp.

References ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), and ITensorHandleFactory::GetImportFlags().

Referenced by CalculateSlotOption().

 {
     if (src != dst)
     {
         ITensorHandleFactory* srcFactory = registry.GetFactory(src);
         ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
 
         if (srcFactory && dstFactory &&
             (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
         {
             return false;
         }
         return true;
     }
     return false;
 }

◆ ReshapeWeightsForAcl()

void ReshapeWeightsForAcl	(	TensorInfo &	weightInfo,
		DataLayout	dataLayout
	)

Definition at line 41 of file WorkloadUtils.cpp.

References TensorInfo::GetShape(), NCHW, NHWC, and TensorInfo::SetShape().

Referenced by ConvertWeightTensorFromArmnnToAcl(), ConvertWeightTensorInfoFromArmnnToAcl(), and GatherTensorHandlePairs().

 {
     // Reshape the weights in-place
     const TensorShape& weightShape = weightInfo.GetShape();
     switch (dataLayout)
     {
         case DataLayout::NHWC:
             // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
             weightInfo.SetShape({ 1,
                                   weightShape[0],
                                   weightShape[1],
                                   weightShape[2] * weightShape[3] });
             weightInfo.SetShape({ 1,
                                   weightShape[0] * weightShape[1],
                                   weightShape[2],
                                   weightShape[3] });
             break;
         case DataLayout::NCHW:
         default:
             // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
             weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] });
             break;
     }
 }

◆ Resize()

void Resize	(	Decoder< float > &	in,
		const TensorInfo &	inputInfo,
		Encoder< float > &	out,
		const TensorInfo &	outputInfo,
		DataLayoutIndexed	dataLayout,
		armnn::ResizeMethod	resizeMethod,
		bool	alignCorners,
		bool	halfPixelCenters
	)

Definition at line 65 of file Resize.cpp.

References ARMNN_ASSERT, Bilinear, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), NearestNeighbor, Resize(), and Encoder< IType >::Set().

Referenced by InferenceTestImage::GetSizeInBytes(), Resize(), ResizeLayer::ResizeLayer(), and TEST_SUITE().

 {
     // alignCorners and halfPixelCenters cannot both be true
     ARMNN_ASSERT(!(alignCorners && halfPixelCenters));
 
     // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
     // image is projected into the input image to figure out the interpolants and weights. Note that this
     // will yield different results than if projecting the centre of output texels.
 
     const unsigned int batchSize = inputInfo.GetShape()[0];
     const unsigned int channelCount = inputInfo.GetShape()[dataLayout.GetChannelsIndex()];
 
     const unsigned int inputHeight = inputInfo.GetShape()[dataLayout.GetHeightIndex()];
     const unsigned int inputWidth = inputInfo.GetShape()[dataLayout.GetWidthIndex()];
     const unsigned int outputHeight = outputInfo.GetShape()[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth = outputInfo.GetShape()[dataLayout.GetWidthIndex()];
 
     // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
     // in the input image.
     const float scaleY = CalculateResizeScale(inputHeight, outputHeight, alignCorners);
     const float scaleX = CalculateResizeScale(inputWidth, outputWidth, alignCorners);
 
     TensorShape inputShape =  inputInfo.GetShape();
     TensorShape outputShape =  outputInfo.GetShape();
 
     for (unsigned int n = 0; n < batchSize; ++n)
     {
         for (unsigned int c = 0; c < channelCount; ++c)
         {
             for (unsigned int y = 0; y < outputHeight; ++y)
             {
                 // Corresponding real-valued height coordinate in input image.
                 float iy = PixelScaler(y, scaleY, halfPixelCenters, resizeMethod);
 
                 // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
                 const float fiy = (resizeMethod == armnn::ResizeMethod::NearestNeighbor && alignCorners) ?
                                   roundf(iy) : floorf(iy);
                 // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
                 const unsigned int y0 = static_cast<unsigned int>(std::max(fiy, 0.0f));
 
                 // Interpolation weight (range [0,1]).
                 const float yw = iy - fiy;
 
                 for (unsigned int x = 0; x < outputWidth; ++x)
                 {
                     // Real-valued and discrete width coordinates in input image.
                     float ix = PixelScaler(x, scaleX, halfPixelCenters, resizeMethod);
 
                     // Nearest Neighbour uses rounding to align to corners
                     const float fix = resizeMethod == armnn::ResizeMethod::NearestNeighbor && alignCorners ?
                                       roundf(ix) : floorf(ix);
                     // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
                     const unsigned int x0 = static_cast<unsigned int>(std::max(fix, 0.0f));
 
                     // Interpolation weight (range [0,1]).
                     const float xw = ix - fix;
 
                     unsigned int x1;
                     unsigned int y1;
                     // Half Pixel Centers uses the scaling to compute a weighted parameter for nearby pixels
                     if (halfPixelCenters)
                     {
                         x1 = std::min(static_cast<unsigned int>(std::ceil(ix)), inputWidth - 1u);
                         y1 = std::min(static_cast<unsigned int>(std::ceil(iy)), inputHeight - 1u);
                     }
                     // Discrete width/height coordinates of texels below and to the right of (x0, y0).
                     else
                     {
                         x1 = std::min(x0 + 1, inputWidth - 1u);
                         y1 = std::min(y0 + 1, inputHeight - 1u);
                     }
 
                     float interpolatedValue;
                     switch (resizeMethod)
                     {
                         case armnn::ResizeMethod::Bilinear:
                         {
                             in[dataLayout.GetIndex(inputShape, n, c, y0, x0)];
                             float input1 = in.Get();
                             in[dataLayout.GetIndex(inputShape, n, c, y0, x1)];
                             float input2 = in.Get();
                             in[dataLayout.GetIndex(inputShape, n, c, y1, x0)];
                             float input3 = in.Get();
                             in[dataLayout.GetIndex(inputShape, n, c, y1, x1)];
                             float input4 = in.Get();
 
                             const float ly0 = Lerp(input1, input2, xw); // lerp along row y0.
                             const float ly1 = Lerp(input3, input4, xw); // lerp along row y1.
                             interpolatedValue = Lerp(ly0, ly1, yw);
                             break;
                         }
                         case armnn::ResizeMethod::NearestNeighbor:
                         {
                             // calculate euclidean distance to the 4 neighbours
                             auto distance00 = EuclideanDistance(fix, fiy, x0, y0);
                             auto distance01 = EuclideanDistance(fix, fiy, x0, y1);
                             auto distance10 = EuclideanDistance(fix, fiy, x1, y0);
                             auto distance11 = EuclideanDistance(fix, fiy, x1, y1);
 
                             auto minimum = std::min( { distance00, distance01, distance10, distance11 } );
 
                             unsigned int xNearest = 0;
                             unsigned int yNearest = 0;
 
                             if (minimum == distance00)
                             {
                                xNearest = x0;
                                yNearest = y0;
                             }
                             else if (minimum == distance01)
                             {
                                 xNearest = x0;
                                 yNearest = y1;
                             }
                             else if (minimum == distance10)
                             {
                                 xNearest = x1;
                                 yNearest = y0;
                             }
                             else if (minimum == distance11)
                             {
                                 xNearest = x1;
                                 yNearest = y1;
                             }
                             else
                             {
                                 throw armnn::InvalidArgumentException("Resize Nearest Neighbor failure");
                             }
 
                             in[dataLayout.GetIndex(inputShape, n, c, yNearest, xNearest)];
                             interpolatedValue = in.Get();
                             break;
                         }
                         default:
                             throw armnn::InvalidArgumentException("Unknown resize method: " +
                                                                   std::to_string(static_cast<int>(resizeMethod)));
                     }
                     out[dataLayout.GetIndex(outputShape, n, c, y, x)];
                     out.Set(interpolatedValue);
                 }
             }
         }
     }
 }

◆ ReturnWithError()

OptimizationResult armnn::ReturnWithError	(	OptimizationResult	res,
		const Layer *	layer,
		const BackendSettings &	backendSettings,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 580 of file Network.cpp.

References GetLayerTypeAsCString(), Layer::GetType(), OptimizationResult::m_Error, BackendSettings::m_PreferredBackends, and ReportError().

Referenced by AssignBackendsIConnectable(), and AttemptBackendAssignment().

 {
     std::stringstream failureMsg;
     failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
                << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
     ReportError(failureMsg.str(), errMessages);
 
     res.m_Error = true;
     return res;
 }

◆ RunClFunction()

void armnn::RunClFunction	(	arm_compute::IFunction &	function,
		const CheckLocation &	location
	)

inline

Definition at line 155 of file ClWorkloadUtils.hpp.

References Error, error, and WrapClError().

 {
     try
     {
         function.run();
     }
     catch (cl::Error& error)
     {
         throw WrapClError(error, location);
     }
 }

◆ RuntimeLoadedNetworksReserve()

void RuntimeLoadedNetworksReserve ( armnn::RuntimeImpl * runtime )

Definition at line 36 of file RuntimeTests.cpp.

Referenced by TEST_SUITE().

 {
     runtime->m_LoadedNetworks.reserve(1);
 }

◆ SelectTensorHandleStrategy()

OptimizationResult SelectTensorHandleStrategy	(	Graph &	optGraph,
		BackendsMap &	backends,
		TensorHandleFactoryRegistry &	registry,
		bool	importEnabled,
		Optional< std::vector< std::string > &>	errMessages
	)

Definition at line 1601 of file Network.cpp.

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, CalculateEdgeStrategy(), CalculateSlotOption(), CalculateSlotOptionForInput(), CalculateSlotOptionForOutput(), Graph::ForEachLayer(), Layer::GetBackendId(), OutputSlot::GetConnections(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, OptimizationResult::m_Error, Output, OutputSlot::SetEdgeStrategy(), OutputSlot::SetTensorHandleFactory(), and Undefined.

Referenced by Optimize(), and TEST_SUITE().

 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
     OptimizationResult result;
 
     optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled](Layer* layer)
     {
         ARMNN_ASSERT(layer);
 
         // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
         // assignment if this check fails
         ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
 
         // Check each output separately
         for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
         {
             OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
 
             ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
 
             // Calculate the factory to use which results in the fewest copies being made.
             switch(layer->GetType())
             {
                 case LayerType::Input:
                     slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
                     break;
                 case LayerType::Output:
                     slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
                     break;
                 default:
                     slotOption = CalculateSlotOption(backends, outputSlot, registry, importEnabled);
                     break;
             }
             outputSlot.SetTensorHandleFactory(slotOption);
 
             // Now determine the "best" edge strategy for each connection given the slotOption.
             unsigned int connectionIdx = 0;
             for (auto&& connection : outputSlot.GetConnections())
             {
                 const Layer& connectedLayer = connection->GetOwningLayer();
 
                 EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
                                                               registry, importEnabled);
 
                 if (strategy == EdgeStrategy::Undefined)
                 {
                     result.m_Error = true;
                     if (errMessages)
                     {
                         errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
                                                          " between backends.");
                     }
                     return;
                 }
 
                 outputSlot.SetEdgeStrategy(connectionIdx, strategy);
 
                 connectionIdx++;
             }
         }
     });
 
     return result;
 }

◆ SetAllLoggingSinks()

void SetAllLoggingSinks	(	bool	standardOut,
		bool	debugOut,
		bool	coloured
	)

Definition at line 191 of file Logging.cpp.

Referenced by SimpleLogger< Level >::AddSink(), ConfigureLogging(), main(), and TEST_SUITE().

 {
     SetLoggingSinks<LogSeverity::Trace>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Debug>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Info>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Warning>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Error>(standardOut, debugOut, coloured);
     SetLoggingSinks<LogSeverity::Fatal>(standardOut, debugOut, coloured);
 }

◆ SetClSliceData()

auto armnn::SetClSliceData	(	const std::vector< unsigned int > &	m_begin,
		const std::vector< unsigned int > &	m_size
	)

inline

Definition at line 91 of file ClWorkloadUtils.hpp.

Referenced by ClSliceWorkload::ClSliceWorkload().

 {
     // This function must translate the size vector given to an end vector
     // expected by the ACL NESlice workload
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
 
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
 
     // For strided slices, we have the relationship size = (end - begin) / stride
     // For slice, we assume stride to be a vector of all ones, yielding the formula
     // size = (end - begin) therefore we know end = size + begin
     for (unsigned int i = 0; i < num_dims; i++)
     {
         unsigned int revertedIndex = num_dims - i - 1;
 
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
     }
 
     return std::make_tuple(starts, ends);
 }

◆ SetClStridedSliceData()

auto armnn::SetClStridedSliceData	(	const std::vector< int > &	m_begin,
		const std::vector< int > &	m_end,
		const std::vector< int > &	m_stride
	)

inline

Definition at line 70 of file ClWorkloadUtils.hpp.

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload().

 {
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
 
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
 
     for (unsigned int i = 0; i < num_dims; i++) {
         unsigned int revertedIndex = num_dims - i - 1;
 
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_end[revertedIndex]));
         strides.set(i, static_cast<int>(m_stride[revertedIndex]));
     }
 
     return std::make_tuple(starts, ends, strides);
 }

◆ SetLogFilter()

void SetLogFilter ( LogSeverity level )

Definition at line 73 of file Logging.cpp.

References ARMNN_ASSERT, ARMNN_FALLTHROUGH, Debug, SimpleLogger< Level >::Enable(), Error, Fatal, SimpleLogger< Level >::Get(), IgnoreUnused(), Info, Trace, and Warning.

Referenced by SimpleLogger< Level >::AddSink(), ConfigureLogging(), main(), and TEST_SUITE().

 {
     SimpleLogger<LogSeverity::Trace>::Get().Enable(false);
     SimpleLogger<LogSeverity::Debug>::Get().Enable(false);
     SimpleLogger<LogSeverity::Info>::Get().Enable(false);
     SimpleLogger<LogSeverity::Warning>::Get().Enable(false);
     SimpleLogger<LogSeverity::Error>::Get().Enable(false);
     SimpleLogger<LogSeverity::Fatal>::Get().Enable(false);
     switch (level)
     {
         case LogSeverity::Trace:
             SimpleLogger<LogSeverity::Trace>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Debug:
             SimpleLogger<LogSeverity::Debug>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Info:
             SimpleLogger<LogSeverity::Info>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Warning:
             SimpleLogger<LogSeverity::Warning>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Error:
             SimpleLogger<LogSeverity::Error>::Get().Enable(true);
             ARMNN_FALLTHROUGH;
         case LogSeverity::Fatal:
             SimpleLogger<LogSeverity::Fatal>::Get().Enable(true);
             break;
         default:
             ARMNN_ASSERT(false);
     }
 }

◆ SetLoggingSinks()

void armnn::SetLoggingSinks	(	bool	standardOut,
		bool	debugOut,
		bool	coloured
	)

inline

Definition at line 167 of file Logging.cpp.

References SimpleLogger< Level >::AddSink(), SimpleLogger< Level >::Get(), and SimpleLogger< Level >::RemoveAllSinks().

 {
     SimpleLogger<Level>::Get().RemoveAllSinks();
 
     if (standardOut)
     {
         if (coloured)
         {
             SimpleLogger<Level>::Get().AddSink(
                 std::make_shared<StandardOutputColourSink>(Level));
         } else
         {
             SimpleLogger<Level>::Get().AddSink(
                 std::make_shared<StandardOutputSink>());
         }
     }
 
     if (debugOut)
     {
         SimpleLogger<Level>::Get().AddSink(
             std::make_shared<DebugOutputSink>());
     }
 }

◆ SetNeonSliceData()

auto armnn::SetNeonSliceData	(	const std::vector< unsigned int > &	m_begin,
		const std::vector< unsigned int > &	m_size
	)

inline

Definition at line 113 of file NeonWorkloadUtils.hpp.

References GetOutputTensorData(), and ITensorHandle::Map().

Referenced by NeonSliceWorkload::NeonSliceWorkload().

 {
     // This function must translate the size vector given to an end vector
     // expected by the ACL NESlice workload
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
 
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
 
     // For strided slices, we have the relationship size = (end - begin) / stride
     // For slice, we assume stride to be a vector of all ones, yielding the formula
     // size = (end - begin) therefore we know end = size + begin
     for (unsigned int i = 0; i < num_dims; i++)
     {
         unsigned int revertedIndex = num_dims - i - 1;
 
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
     }
 
     return std::make_tuple(starts, ends);
 }

◆ SetNeonStridedSliceData()

auto armnn::SetNeonStridedSliceData	(	const std::vector< int > &	m_begin,
		const std::vector< int > &	m_end,
		const std::vector< int > &	m_stride
	)

inline

Definition at line 91 of file NeonWorkloadUtils.hpp.

Referenced by NeonStridedSliceWorkload::NeonStridedSliceWorkload().

 {
     arm_compute::Coordinates starts;
     arm_compute::Coordinates ends;
     arm_compute::Coordinates strides;
 
     unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
 
     for (unsigned int i = 0; i < num_dims; i++)
     {
         unsigned int revertedIndex = num_dims - i - 1;
 
         starts.set(i, static_cast<int>(m_begin[revertedIndex]));
         ends.set(i, static_cast<int>(m_end[revertedIndex]));
         strides.set(i, static_cast<int>(m_stride[revertedIndex]));
     }
 
     return std::make_tuple(starts, ends, strides);
 }

◆ SetValueChecked()

void armnn::SetValueChecked	(	Optional< T &>	optionalRef,
		V &&	val
	)

Definition at line 17 of file LayerSupportCommon.hpp.

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by FalseFuncF16(), FalseFuncF32(), FalseFuncI32(), FalseFuncU8(), FalseInputFuncF16(), FalseInputFuncF32(), FalseOutputFuncF16(), FalseOutputFuncF32(), ClLayerSupport::IsConcatSupported(), NeonLayerSupport::IsConcatSupported(), ClLayerSupport::IsSplitterSupported(), and NeonLayerSupport::IsSplitterSupported().

 {
     if (optionalRef)
     {
         optionalRef.value() = val;
     }
 }

◆ Slice()

void Slice	(	const TensorInfo &	inputInfo,
		const SliceDescriptor &	descriptor,
		const void *	inputData,
		void *	outputData,
		unsigned int	dataTypeSize
	)

Definition at line 14 of file Slice.cpp.

References ARMNN_ASSERT, TensorShape::GetNumDimensions(), TensorInfo::GetShape(), IgnoreUnused(), SliceDescriptor::m_Begin, and SliceDescriptor::m_Size.

Referenced by TEST_SUITE().

 {
     const TensorShape& inputShape = inputInfo.GetShape();
     const unsigned int numDims    = inputShape.GetNumDimensions();
 
     ARMNN_ASSERT(descriptor.m_Begin.size() == numDims);
     ARMNN_ASSERT(descriptor.m_Size.size()  == numDims);
 
     constexpr unsigned int maxNumDims = 4;
     ARMNN_ASSERT(numDims <= maxNumDims);
 
     std::vector<unsigned int> paddedInput(4);
     std::vector<unsigned int> paddedBegin(4);
     std::vector<unsigned int> paddedSize (4);
 
     const unsigned int numPaddingDims = maxNumDims - numDims;
     for (unsigned int i = 0u; i < maxNumDims; ++i)
     {
         if (i < numPaddingDims)
         {
             paddedInput[i] = 1u;
             paddedBegin[i] = 0u;
             paddedSize[i]  = 1u;
         }
         else
         {
             const unsigned int j = i - numPaddingDims;
             paddedInput[i] = inputShape[j];
             paddedBegin[i] = descriptor.m_Begin[j];
             paddedSize[i]  = descriptor.m_Size[j];
         }
     }
 
     unsigned int dim0 = paddedInput[0];
     unsigned int dim1 = paddedInput[1];
     unsigned int dim2 = paddedInput[2];
     unsigned int dim3 = paddedInput[3];
 
     unsigned int begin0 = paddedBegin[0];
     unsigned int begin1 = paddedBegin[1];
     unsigned int begin2 = paddedBegin[2];
     unsigned int begin3 = paddedBegin[3];
 
     unsigned int size0  = paddedSize[0];
     unsigned int size1  = paddedSize[1];
     unsigned int size2  = paddedSize[2];
     unsigned int size3  = paddedSize[3];
 
     ARMNN_ASSERT(begin0 + size0 <= dim0);
     ARMNN_ASSERT(begin1 + size1 <= dim1);
     ARMNN_ASSERT(begin2 + size2 <= dim2);
     ARMNN_ASSERT(begin3 + size3 <= dim3);
 
     const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
     unsigned char* output      = reinterpret_cast<unsigned char*>(outputData);
 
     IgnoreUnused(dim0);
     for (unsigned int idx0 = begin0; idx0 < begin0 + size0; ++idx0)
     {
         for (unsigned int idx1 = begin1; idx1 < begin1 + size1; ++idx1)
         {
             for (unsigned int idx2 = begin2; idx2 < begin2 + size2; ++idx2)
             {
                 for (unsigned int idx3 = begin3; idx3 < begin3 + size3; ++idx3)
                 {
                     const unsigned int inputOffset =
                         (((idx0 * dim1 + idx1) * dim2 + idx2) * dim3 + idx3) * dataTypeSize;
 
                     ::memcpy(output, input + inputOffset, dataTypeSize);
                     output += dataTypeSize;
                 }
             }
         }
     }
 }

◆ Softmax()

void Softmax	(	Decoder< float > &	in,
		Encoder< float > &	out,
		const TensorInfo &	inputTensorInfo,
		float	beta,
		int	axis
	)

Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.

Definition at line 17 of file Softmax.cpp.

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

 {
     ARMNN_ASSERT_MSG(axis < static_cast<int>(inputTensorInfo.GetNumDimensions()),
                      "Required axis index greater than number of dimensions.");
     ARMNN_ASSERT_MSG(axis >= -static_cast<int>(inputTensorInfo.GetNumDimensions()),
                      "Required axis index lower than negative of the number of dimensions");
 
     unsigned int uAxis = axis < 0  ?
                          inputTensorInfo.GetNumDimensions() - static_cast<unsigned int>(abs(axis))
                          : static_cast<unsigned int>(axis);
 
     const TensorShape& inputShape = inputTensorInfo.GetShape();
     const unsigned int outerSize  = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
     const unsigned int axisSize   = inputShape[uAxis];
     const unsigned int innerSize  = armnnUtils::GetNumElementsBetween(inputShape,
                                                                       uAxis + 1,
                                                                       inputShape.GetNumDimensions());
 
     for (unsigned int outer = 0; outer < outerSize; ++outer)
     {
         unsigned int inputBeginIdx  = outer * axisSize * innerSize;
         unsigned int inputEndIdx    = inputBeginIdx + axisSize * innerSize;
         unsigned int outputBeginIdx = outer * axisSize * innerSize;
 
         for (unsigned int inner = 0; inner < innerSize; ++inner, ++inputBeginIdx, ++inputEndIdx, ++outputBeginIdx)
         {
             // Find max
             float maxValue = std::numeric_limits<float>::lowest();
             for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
             {
                 in[iter];
                 maxValue = std::max(maxValue, in.Get());
             }
 
             // Compute sum
             float sum = 0.0f;
             for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
             {
                 in[iter];
                 sum += std::exp((in.Get() - maxValue) * beta);
             }
 
             // Compute result
             unsigned int outputIter = outputBeginIdx;
             out[outputIter];
             for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize, outputIter += innerSize)
             {
                 out[outputIter];
                 in[iter];
                 out.Set(std::exp((in.Get() - maxValue) * beta) / sum);
             }
         }
     }
 }

◆ SpaceToBatchNd()

void SpaceToBatchNd	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const SpaceToBatchNdDescriptor &	params,
		Decoder< float > &	inputData,
		Encoder< float > &	outputData
	)

Definition at line 34 of file SpaceToBatchNd.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToBatchNdDescriptor::m_BlockShape, SpaceToBatchNdDescriptor::m_DataLayout, SpaceToBatchNdDescriptor::m_PadList, Encoder< IType >::Set(), and SpaceToBatchNd().

Referenced by SpaceToBatchNd(), SpaceToBatchNdLayer::SpaceToBatchNdLayer(), and TEST_SUITE().

 {
     DataLayoutIndexed dataLayout = params.m_DataLayout;
 
     const TensorShape& inputShape = inputInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
 
     const unsigned int channels = inputShape[dataLayout.GetChannelsIndex()];
 
     const unsigned int inputBatchSize = inputShape[0];
     const unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
     const unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
 
     const unsigned int outputBatchSize = outputShape[0];
     const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
 
     const unsigned int blockHeight = params.m_BlockShape[0];
     const unsigned int blockWidth = params.m_BlockShape[1];
 
     const unsigned int paddingTop = params.m_PadList[0].first;
     const unsigned int paddingLeft = params.m_PadList[1].first;
 
     for (unsigned int outB = 0; outB < outputBatchSize; outB++)
     {
         unsigned int inB = outB % inputBatchSize;
 
         unsigned int shiftW = (outB / inputBatchSize) % blockWidth;
         unsigned int shiftH = (outB / inputBatchSize) / blockWidth;
 
         for (unsigned int outH = 0; outH < outputHeight; outH++)
         {
             for (unsigned int outW = 0; outW < outputWidth; outW++)
             {
                 if (outH * blockHeight + shiftH < paddingTop ||
                     outH * blockHeight + shiftH >= paddingTop + inputHeight ||
                     outW * blockWidth + shiftW < paddingLeft ||
                     outW * blockWidth + shiftW >= paddingLeft + inputWidth)
                 {
                     for (unsigned int c = 0; c < channels; c++)
                     {
                         unsigned int outOffset = GetOffset(outputShape,
                                                            outB,
                                                            outH,
                                                            outW,
                                                            c,
                                                            dataLayout);
                         outputData += outOffset;
                         outputData.Set(0);
                         outputData -= outOffset;
                     }
                 }
                 else
                 {
                     for (unsigned int c = 0; c < channels; c++)
                     {
                         unsigned int inOffset = GetOffset(inputShape,
                                                           inB,
                                                           (outH * blockHeight + shiftH) - paddingTop,
                                                           (outW * blockWidth + shiftW) - paddingLeft,
                                                           c,
                                                           dataLayout);
 
                         unsigned int outOffset = GetOffset(outputShape,
                                                            outB,
                                                            outH,
                                                            outW,
                                                            c,
                                                            dataLayout);
 
                         outputData += outOffset;
                         inputData += inOffset;
                         outputData.Set(inputData.Get());
                         inputData -= inOffset;
                         outputData -= outOffset;
                     }
                 }
             }
         }
     }
 }

◆ SpaceToDepth()

void SpaceToDepth	(	const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo,
		const SpaceToDepthDescriptor &	params,
		Decoder< float > &	inputData,
		Encoder< float > &	outputData
	)

Definition at line 36 of file SpaceToDepth.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, Encoder< IType >::Set(), and SpaceToDepth().

Referenced by SpaceToDepth(), SpaceToDepthLayer::SpaceToDepthLayer(), and TEST_SUITE().

 {
     DataLayoutIndexed dataLayout = params.m_DataLayout;
 
     const TensorShape& inputShape = inputInfo.GetShape();
     const TensorShape& outputShape = outputInfo.GetShape();
 
     const unsigned int inputBatchSize = inputShape[0];
     const unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
 
     const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
     const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
     const unsigned int outputChannels = outputShape[dataLayout.GetChannelsIndex()];
 
     const unsigned int blockSize = params.m_BlockSize;
 
     if (blockSize == 0)
     {
         throw InvalidArgumentException(
             "Input shape must be divisible by block size in all spatial dimensions: Block size is"
             " equal to zero");
     }
 
     for (unsigned int outChannelIndex = 0; outChannelIndex < outputChannels; outChannelIndex++)
     {
         unsigned int inChannelIndex = outChannelIndex % inputChannels;
 
         unsigned int shiftW = (outChannelIndex / inputChannels) % blockSize;
         unsigned int shiftH = (outChannelIndex / inputChannels) / blockSize;
 
         for (unsigned int outH = 0; outH < outputHeight; outH++)
         {
             for (unsigned int outW = 0; outW < outputWidth; outW++)
             {
                 for (unsigned int inBatchIndex = 0; inBatchIndex < inputBatchSize; inBatchIndex++)
                 {
                     unsigned int inOffset = GetOffset(inputShape,
                         inChannelIndex,
                         (outH * blockSize + shiftH),
                         (outW * blockSize + shiftW),
                         inBatchIndex,
                         dataLayout);
 
                     unsigned int outOffset = GetOffset(outputShape,
                         outChannelIndex,
                         outH,
                         outW,
                         inBatchIndex,
                         dataLayout);
 
                     outputData += outOffset;
                     inputData += inOffset;
                     outputData.Set(inputData.Get());
                     inputData -= inOffset;
                     outputData -= outOffset;
                 }
             }
         }
     }
 }

◆ Split()

void Split	(	const SplitterQueueDescriptor &	data,
		std::vector< ITensorHandle *>	inputs,
		std::vector< ITensorHandle *>	outputs
	)

Definition at line 21 of file Splitter.cpp.

References ARMNN_ASSERT, Encoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

Referenced by RefSplitterWorkload::ExecuteAsync(), and Splitter().

 {
     const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
 
     std::unique_ptr<Decoder<float>> decoderPtr =
         MakeDecoder<float>(inputInfo, inputs[0]->Map());
     Decoder<float>& decoder = *decoderPtr;
 
     for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index)
     {
         unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
 
         unsigned int indexRemainder = index;
         unsigned int dimensionStride = inputInfo.GetNumElements();
 
         for (unsigned int i = 0; i<inputInfo.GetNumDimensions(); i++)
         {
             dimensionStride /= inputInfo.GetShape()[i];
             indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
             indexRemainder -= indices[i] * dimensionStride;
         }
 
         for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
         {
             SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
 
             //Split view extents are defined by the size of (the corresponding) input tensor.
             const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
             ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions());
 
             // Check all dimensions to see if this element is inside the given input view.
             bool insideView = true;
             for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
             {
                 if (indices[i] < view.m_Origin[i])
                 {
                     insideView = false;
                 }
                 if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
                 {
                     insideView = false;
                 }
             }
 
             if (insideView)
             {
                 std::unique_ptr<Encoder<float>> encoderPtr =
                     MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map());
                 Encoder<float>& encoder = *encoderPtr;
 
                 unsigned int outIndex = 0;
                 unsigned int dimensionStride = 1;
                 float inputValue = 0.f;
 
                 for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
                 {
                     outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
                     dimensionStride *= outputInfo.GetShape()[i];
                 }
 
                 decoder += index;
                 inputValue = decoder.Get();
                 decoder -= index;
 
                 encoder += outIndex;
                 encoder.Set(inputValue);
                 break;
             }
         }
     }
 }

◆ Splitter()

void armnn::Splitter	(	const SplitterQueueDescriptor &	data,
		std::vector< ITensorHandle *>	inputs,
		std::vector< ITensorHandle *>	outputs
	)

Definition at line 17 of file Splitter.hpp.

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, MaxNumOfTensorDimensions, and Split().

Referenced by TEST_SUITE().

 {
     const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
 
     for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
     {
         unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
 
         unsigned int indexRemainder = index;
         unsigned int dimensionStride = inputInfo0.GetNumElements();
 
         for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
         {
             dimensionStride /= inputInfo0.GetShape()[i];
             indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
             indexRemainder -= indices[i] * dimensionStride;
         }
 
         for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
         {
             SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
 
             //Split view extents are defined by the size of (the corresponding) input tensor.
             const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
             ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
 
             // Check all dimensions to see if this element is inside the given input view.
             bool insideView = true;
             for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
             {
                 if (indices[i] < view.m_Origin[i])
                 {
                     insideView = false;
                 }
                 if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
                 {
                     insideView = false;
                 }
             }
 
             if (insideView)
             {
                 unsigned int outIndex = 0;
                 unsigned int dimensionStride = 1;
 
                 for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
                 {
                     outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
                     dimensionStride *= outputInfo.GetShape()[i];
                 }
 
                 //We are within the view, to copy input data to the output corresponding to this view.
                 DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
                 ARMNN_ASSERT(outputData);
 
                 const DataType* inputData = GetInputTensorData<DataType>(0, data);
                 ARMNN_ASSERT(inputData);
 
                 outputData[outIndex] = inputData[index];
             }
         }
     }
 }

◆ Stack()

void Stack	(	const StackQueueDescriptor &	data,
		std::vector< std::unique_ptr< Decoder< float >>> &	inputs,
		Encoder< float > &	output,
		const TensorInfo &	inputInfo,
		const TensorInfo &	outputInfo
	)

Definition at line 12 of file Stack.cpp.

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), StackDescriptor::m_Axis, QueueDescriptor::m_Inputs, StackDescriptor::m_NumInputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

Referenced by TEST_SUITE().

 {
     unsigned int outputNumDims = outputInfo.GetNumDimensions();
     unsigned int inputNumDims = inputInfo.GetNumDimensions();
 
     const armnn::TensorShape& outputDims = outputInfo.GetShape();
     const armnn::TensorShape& inputDims = inputInfo.GetShape();
 
     unsigned int axis = data.m_Parameters.m_Axis;
 
     // Can perform a simple concatenation when axis == 0
     if (!axis)
     {
         unsigned int numInputs = data.m_Parameters.m_NumInputs;
         unsigned int inputLength = inputInfo.GetNumElements();
 
         for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
         {
             for (unsigned int elmt=0; elmt<inputLength; ++elmt)
             {
                 (*inputs[inputIdx])[elmt];
                 output[(inputIdx * inputLength) + elmt];
                 output.Set(inputs[inputIdx]->Get());
             }
         }
         return;
     }
 
     const unsigned int iNumTensors = static_cast<unsigned int>(data.m_Inputs.size());
     const unsigned int iBatchSize  = inputDims[0];
     const unsigned int iChannels   = (inputNumDims > 1) ? inputDims[1] : 1;
     const unsigned int iHeight     = (inputNumDims > 2) ? inputDims[2] : 1;
     const unsigned int iWidth      = (inputNumDims > 3) ? inputDims[3] : 1;
 
     const unsigned int oBatchSize  = outputDims[1];
     const unsigned int oChannels   = (outputNumDims > 2) ? outputDims[2] : 1;
     const unsigned int oHeight     = (outputNumDims > 3) ? outputDims[3] : 1;
     const unsigned int oWidth      = (outputNumDims > 4) ? outputDims[4] : 1;
 
     // Array to store the input coordinates
     // iCoordinates[0] = i, iCoordinates[1] = bi, iCoordinates[2] = ci
     // iCoordinates[3] = hi, iCoordinates[4] = wi, iCoordinates[5] = 0
     // iCoordinates[5] will be always zero and used for not incrementing
     // the output when the input has less than 4 dimensions
     std::array<unsigned int, 6> iCoordinates{ 0 };
 
     // Array of pointers used to map the output coordinates to the input ones, in accordance with the axis
     // This array is initialized with &iCoordinates[5] since this will be always zero
     std::array<unsigned int *, 5> oCoordinates = { &iCoordinates[5],
                                                    &iCoordinates[5],
                                                    &iCoordinates[5],
                                                    &iCoordinates[5],
                                                    &iCoordinates[5] };
 
     // Set the axis coordinate
     oCoordinates[axis] = &iCoordinates[0];
 
     // Map the output coordinates, accounting for the axis
     unsigned int dim_shift = 0;
     for(unsigned int dim = 0; dim < inputNumDims; ++dim)
     {
         if(dim == axis)
         {
             dim_shift++;
         }
         oCoordinates[dim + dim_shift] = &iCoordinates[dim + 1];
     }
 
     // Alias for the input coordinates
     unsigned int &i  = iCoordinates[0];
     unsigned int &bi = iCoordinates[1];
     unsigned int &ci = iCoordinates[2];
     unsigned int &hi = iCoordinates[3];
     unsigned int &wi = iCoordinates[4];
 
     // Alias for the output coordinates
     unsigned int &o  = *(oCoordinates[0]);
     unsigned int &bo = *(oCoordinates[1]);
     unsigned int &co = *(oCoordinates[2]);
     unsigned int &ho = *(oCoordinates[3]);
     unsigned int &wo = *(oCoordinates[4]);
 
     // Stack tensors
     for(; i < iNumTensors; ++(i))
     {
         for(bi = 0; bi < iBatchSize; ++(bi))
         {
             for(ci = 0; ci < iChannels; ++(ci))
             {
                 for(hi = 0; hi < iHeight; ++(hi))
                 {
                     for(wi = 0; wi < iWidth; ++(wi))
                     {
                         output[o  * oWidth * oHeight * oChannels * oBatchSize +
                                bo * oWidth * oHeight * oChannels +
                                co * oWidth * oHeight +
                                ho * oWidth +
                                wo];
 
                         output.Set(inputs[i]->Get());
 
                         ++(*(inputs[i]));
                     }
                 }
             }
         }
     }
 }

◆ StrEqual()

constexpr bool armnn::StrEqual	(	const char *	strA,
		const char(&)	strB[N]
	)

Definition at line 170 of file TypesUtils.hpp.

Referenced by ParseComputeDevice().

 {
     bool isEqual = true;
     for (unsigned i = 0; isEqual && (i < N); ++i)
     {
         isEqual = (strA[i] == strB[i]);
     }
     return isEqual;
 }

◆ StridedSlice()

void StridedSlice	(	const TensorInfo &	inputInfo,
		const StridedSliceDescriptor &	params,
		const void *	inputData,
		void *	outputData,
		unsigned int	dataTypeSize
	)

Definition at line 90 of file StridedSlice.cpp.

References TensorInfo::GetShape(), and numeric_cast().

Referenced by TEST_SUITE().

 {
     const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
     unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
 
     const TensorShape inputShape = ExtendShape(inputInfo.GetShape(), 4);
 
     StridedSliceDescriptor paddedParams = params;
 
     // Pad parameters to 4 dimensions
     PadParams(paddedParams, 4);
 
     const int start0 = paddedParams.GetStartForAxis(inputShape, 0);
     const int stop0  = paddedParams.GetStopForAxis (inputShape, 0, start0);
 
     const int start1 = paddedParams.GetStartForAxis(inputShape, 1);
     const int stop1  = paddedParams.GetStopForAxis (inputShape, 1, start1);
 
     const int start2 = paddedParams.GetStartForAxis(inputShape, 2);
     const int stop2  = paddedParams.GetStopForAxis (inputShape, 2, start2);
 
     const int start3 = paddedParams.GetStartForAxis(inputShape, 3);
     const int stop3  = paddedParams.GetStopForAxis (inputShape, 3, start3);
 
     const int step = armnn::numeric_cast<int>(dataTypeSize);
 
     for (int in0 = start0;
          !LoopCondition(in0, stop0, paddedParams.m_Stride[0]);
          in0 += paddedParams.m_Stride[0])
     {
         for (int in1 = start1;
              !LoopCondition(in1, stop1, paddedParams.m_Stride[1]);
              in1 += paddedParams.m_Stride[1])
         {
             for (int in2 = start2;
                  !LoopCondition(in2, stop2, paddedParams.m_Stride[2]);
                  in2 += paddedParams.m_Stride[2])
             {
                 for (int in3 = start3;
                      !LoopCondition(in3, stop3, paddedParams.m_Stride[3]);
                      in3 += paddedParams.m_Stride[3])
                 {
                     int dim1 = armnn::numeric_cast<int>(inputShape[1]);
                     int dim2 = armnn::numeric_cast<int>(inputShape[2]);
                     int dim3 = armnn::numeric_cast<int>(inputShape[3]);
 
                     int inputOffset = (((in0 * dim1 + in1) * dim2 + in2) * dim3 + in3) * step;
                     ::memcpy(output, input + inputOffset, dataTypeSize);
                     output += step;
                 }
             }
         }
     }
 }

◆ StringToLogLevel()

LogSeverity armnn::StringToLogLevel ( std::string level )

inline

Definition at line 36 of file Logging.hpp.

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by DelegateOptions::SetLoggingSeverity().

 {
     // Transfer to lower case
     std::transform(level.begin(), level.end(), level.begin(),
                    [](unsigned char c){ return std::tolower(c); }
     );
 
     if (level == "trace")
     {
         return LogSeverity::Trace;
     }
     else if (level == "debug")
     {
         return LogSeverity::Debug;
     }
     else if (level == "info")
     {
         return LogSeverity::Info;
     }
     else if (level == "warning")
     {
         return LogSeverity::Warning;
     }
     else if (level == "error")
     {
         return LogSeverity::Error;
     }
     else if (level == "fatal")
     {
         return LogSeverity::Fatal;
     }
     else
     {
         throw armnn::Exception("Unknown severity level for logging: '" + level +
                                "'. Valid options: trace, debug, info, warning, error, fatal");
     }
 }

◆ swap() [1/2]

void armnn::swap	(	OriginsDescriptor &	first,
		OriginsDescriptor &	second
	)

Definition at line 350 of file Descriptors.cpp.

References ViewsDescriptor::swap, and swap().

Referenced by FullyConnectedFloat32Test(), FullyConnectedLargeTestCommon(), BackendId::operator=(), SquashEqualSiblingsImpl< Comparable >::Run(), BackendRegistry::Swap(), and TEST_SUITE().

 {
     using std::swap;
     swap(first.m_NumViews, second.m_NumViews);
     swap(first.m_NumDimensions, second.m_NumDimensions);
     swap(first.m_ViewOrigins, second.m_ViewOrigins);
     swap(first.m_ConcatAxis, second.m_ConcatAxis);
 }

◆ swap() [2/2]

void armnn::swap	(	ViewsDescriptor &	first,
		ViewsDescriptor &	second
	)

Definition at line 359 of file Descriptors.cpp.

References ViewsDescriptor::swap.

Referenced by swap().

 {
     using std::swap;
     swap(first.m_Origins, second.m_Origins);
     swap(first.m_ViewSizes, second.m_ViewSizes);
 }

◆ TEST_SUITE() [1/3]

armnn::TEST_SUITE ( "TestInputOutputLayerVisitor" )

Definition at line 13 of file TestInputOutputLayerVisitor.cpp.

References NetworkImpl::AddInputLayer(), NetworkImpl::AddOutputLayer(), and IConnectableLayer::ExecuteStrategy().

 {
 TEST_CASE("CheckInputLayerVisitorBindingIdAndName")
 {
     const char* layerName = "InputLayer";
     TestInputLayerVisitor visitor(1, layerName);
     NetworkImpl net;
 
     IConnectableLayer *const layer = net.AddInputLayer(1, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckInputLayerVisitorBindingIdAndNameNull")
 {
     TestInputLayerVisitor visitor(1);
     NetworkImpl net;
 
     IConnectableLayer *const layer = net.AddInputLayer(1);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckOutputLayerVisitorBindingIdAndName")
 {
     const char* layerName = "OutputLayer";
     TestOutputLayerVisitor visitor(1, layerName);
     NetworkImpl net;
 
     IConnectableLayer *const layer = net.AddOutputLayer(1, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckOutputLayerVisitorBindingIdAndNameNull")
 {
     TestOutputLayerVisitor visitor(1);
     NetworkImpl net;
 
     IConnectableLayer *const layer = net.AddOutputLayer(1);
     layer->ExecuteStrategy(visitor);
 }
 
 }

◆ TEST_SUITE() [2/3]

armnn::TEST_SUITE ( "MemoryManagerTests" )

Unit test Storing, Allocating and Deallocating with a custom allocator.

Definition at line 53 of file MemoryManagerTests.cpp.

References MemoryManager::Allocate(), MemoryManager::Deallocate(), and MemoryManager::StoreMemToAllocate().

 {
 /// Unit test Storing, Allocating and Deallocating with a custom allocator.
 TEST_CASE("MemoryManagerTest")
 {
     using namespace armnn;
 
     // Create mock up bufferStorageVector with 2 BufferStorage with the same TensorMemory
     size_t numTensors = 5;
     std::vector<std::shared_ptr<TensorMemory>> tensorMemoryPointerVector(numTensors);
     std::vector<std::shared_ptr<TensorMemory>> tensorMemoryVector;
     tensorMemoryVector.reserve(numTensors);
 
     std::vector<size_t> offsets(numTensors);
     std::iota(std::begin(offsets), std::end(offsets), 0);
 
     for (uint32_t idx = 0; idx < tensorMemoryPointerVector.size(); ++idx)
     {
         tensorMemoryVector.emplace_back(std::make_shared<TensorMemory>(TensorMemory{offsets[idx], 0, nullptr}));
 
         tensorMemoryPointerVector[idx] = tensorMemoryVector[idx];
     }
 
     std::vector<BufferStorage> bufferStorageVector;
     bufferStorageVector.emplace_back(BufferStorage{tensorMemoryPointerVector, numTensors});
     bufferStorageVector.emplace_back(BufferStorage{tensorMemoryPointerVector, numTensors});
 
     // Create an instance of the SampleCustomAllocator
     std::shared_ptr<SampleCustomAllocator> customAllocator =
             std::make_unique<SampleCustomAllocator>(SampleCustomAllocator());
 
     customAllocator->m_Values = {10, 11, 12, 13, 14};
     // Check that the test was set up correctly
     CHECK(customAllocator->m_Values.size() == numTensors);
 
     size_t bufferVecSize =  bufferStorageVector.size();
     // Utilise 3 functions in the MemoryManager. Check the counters and the pointer to the values are correct.
     MemoryManager memoryManager;
     memoryManager.StoreMemToAllocate(bufferStorageVector, customAllocator);
 
     memoryManager.Allocate();
     CHECK(customAllocator->m_CounterAllocate == bufferVecSize);
 
     uint32_t idx = 0;
     for (auto tensorMemory : tensorMemoryVector)
     {
         auto value = reinterpret_cast<uint8_t *>(tensorMemory->m_Data);
         CHECK(customAllocator->m_Values[idx] == *value);
         idx += 1;
     }
 
     memoryManager.Deallocate();
     CHECK(customAllocator->m_CounterFree == bufferStorageVector.size());
 }
 }

◆ TEST_SUITE() [3/3]

armnn::TEST_SUITE ( "TestConstTensorLayerVisitor" )

Definition at line 110 of file ConstTensorLayerVisitor.cpp.

Referenced by TEST_SUITE().

 {
 TEST_CASE("CheckConvolution2dLayer")
 {
     Convolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
     descriptor.m_BiasEnabled = false;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConvolution2dLayerVisitor visitor(descriptor);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedConvolution2dLayer")
 {
     const char* layerName = "Convolution2dLayer";
     Convolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConvolution2dLayerVisitor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, layerName);
 
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckConvolution2dLayerWithBiases")
 {
     Convolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConstantLayerVisitor biasVisitor(biases);
     TestConvolution2dLayerVisitor visitor(descriptor);
 
     NetworkImpl net;
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const biasLayer = net.AddConstantLayer(biases);
     IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor);
 
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
     biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
 
     biasLayer->ExecuteStrategy(biasVisitor);
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedConvolution2dLayerWithBiases")
 {
     const char* layerName = "Convolution2dLayer";
     Convolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConstantLayerVisitor biasVisitor(biases);
     TestConvolution2dLayerVisitor visitor(descriptor, layerName);
 
     NetworkImpl net;
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const biasLayer = net.AddConstantLayer(biases);
     IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, layerName);
 
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
     biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
 
     biasLayer->ExecuteStrategy(biasVisitor);
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckDepthwiseConvolution2dLayer")
 {
     DepthwiseConvolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     NetworkImpl net;
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestDepthwiseConvolution2dLayerVisitor visitor(descriptor);
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedDepthwiseConvolution2dLayer")
 {
     const char* layerName = "DepthwiseConvolution2dLayer";
     DepthwiseConvolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     NetworkImpl net;
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, layerName);
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, layerName);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckDepthwiseConvolution2dLayerWithBiases")
 {
     DepthwiseConvolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConstantLayerVisitor biasesVisitor(biases);
     TestDepthwiseConvolution2dLayerVisitor visitor(descriptor);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
     IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
     biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     biasesLayer->ExecuteStrategy(biasesVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedDepthwiseConvolution2dLayerWithBiases")
 {
     const char* layerName = "DepthwiseConvolution2dLayer";
     DepthwiseConvolution2dDescriptor descriptor;
     descriptor.m_PadLeft = 2;
     descriptor.m_PadRight = 3;
     descriptor.m_PadBottom = 1;
     descriptor.m_PadTop = 5;
     descriptor.m_StrideX = 2;
     descriptor.m_StrideY = 3;
     descriptor.m_DataLayout = DataLayout::NHWC;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConstantLayerVisitor biasesVisitor(biases);
     TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
     IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, layerName);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
     biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     biasesLayer->ExecuteStrategy(biasesVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckFullyConnectedLayer")
 {
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
     descriptor.m_ConstantWeights = true;
     descriptor.m_BiasEnabled = false;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestFullyConnectedLayerVistor visitor(descriptor);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedFullyConnectedLayer")
 {
     const char* layerName = "FullyConnectedLayer";
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
     descriptor.m_ConstantWeights = true;
     descriptor.m_BiasEnabled = false;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestFullyConnectedLayerVistor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckFullyConnectedLayerWithBiases")
 {
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
     descriptor.m_ConstantWeights = true;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConstantLayerVisitor biasesVisitor(biases);
     TestFullyConnectedLayerVistor visitor(descriptor);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
     IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
     biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     biasesLayer->ExecuteStrategy(biasesVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedFullyConnectedLayerWithBiases")
 {
     const char* layerName = "FullyConnectedLayer";
     FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
     descriptor.m_ConstantWeights = true;
     descriptor.m_BiasEnabled = true;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
     ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
 
     TestConstantLayerVisitor weightsVisitor(weights);
     TestConstantLayerVisitor biasesVisitor(biases);
     TestFullyConnectedLayerVistor visitor(descriptor, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
     IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
     IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName);
     weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
     biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
 
     weightsLayer->ExecuteStrategy(weightsVisitor);
     biasesLayer->ExecuteStrategy(biasesVisitor);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckBatchNormalizationLayer")
 {
     BatchNormalizationDescriptor descriptor;
     descriptor.m_Eps = 0.0002f;
     descriptor.m_DataLayout = DataLayout::NHWC;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor mean(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> varianceData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> varianceDimensions = {1, 1, 3, 3};
     ConstTensor variance(TensorInfo(4, varianceDimensions.data(), DataType::Float32, 0.0f, 0, true), varianceData);
 
     std::vector<float> betaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> betaDimensions = {1, 1, 3, 3};
     ConstTensor beta(TensorInfo(4, betaDimensions.data(), DataType::Float32, 0.0f, 0, true), betaData);
 
     std::vector<float> gammaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> gammaDimensions = {1, 1, 3, 3};
     ConstTensor gamma(TensorInfo(4, gammaDimensions.data(), DataType::Float32, 0.0f, 0, true), gammaData);
 
     TestBatchNormalizationLayerVisitor visitor(descriptor, mean, variance, beta, gamma);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedBatchNormalizationLayer")
 {
     const char* layerName = "BatchNormalizationLayer";
     BatchNormalizationDescriptor descriptor;
     descriptor.m_Eps = 0.0002f;
     descriptor.m_DataLayout = DataLayout::NHWC;
 
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor mean(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     std::vector<float> varianceData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> varianceDimensions = {1, 1, 3, 3};
     ConstTensor variance(TensorInfo(4, varianceDimensions.data(), DataType::Float32, 0.0f, 0, true), varianceData);
 
     std::vector<float> betaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> betaDimensions = {1, 1, 3, 3};
     ConstTensor beta(TensorInfo(4, betaDimensions.data(), DataType::Float32, 0.0f, 0, true), betaData);
 
     std::vector<float> gammaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> gammaDimensions = {1, 1, 3, 3};
     ConstTensor gamma(TensorInfo(4, gammaDimensions.data(), DataType::Float32, 0.0f, 0, true), gammaData);
 
     TestBatchNormalizationLayerVisitor visitor(descriptor, mean, variance, beta, gamma, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddBatchNormalizationLayer(
             descriptor, mean, variance, beta, gamma, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckConstLayer")
 {
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor input(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     TestConstantLayerVisitor visitor(input);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddConstantLayer(input);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedConstLayer")
 {
     const char* layerName = "ConstantLayer";
     std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> dimensions = {1, 1, 3, 3};
     ConstTensor input(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
 
     TestConstantLayerVisitor visitor(input, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddConstantLayer(input, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckLstmLayerBasic")
 {
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     TestLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedLstmLayerBasic")
 {
     const char* layerName = "LstmLayer";
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     TestLstmLayerVisitor visitor(descriptor, params, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckLstmLayerCifgDisabled")
 {
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = false; // if this is true then we DON'T need to set the OptCifgParams
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> inputToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<float> recurrentToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<float> inputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputGateBiasData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     params.m_InputToInputWeights = &inputToInputWeights;
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_InputGateBias = &inputGateBias;
 
     TestLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedLstmLayerCifgDisabled")
 {
     const char* layerName = "LstmLayer";
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = false; // if this is true then we DON'T need to set the OptCifgParams
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> inputToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<float> recurrentToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<float> inputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputGateBiasData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     params.m_InputToInputWeights = &inputToInputWeights;
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_InputGateBias = &inputGateBias;
 
     TestLstmLayerVisitor visitor(descriptor, params, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 // TODO add one with peephole
 TEST_CASE("CheckLstmLayerPeephole")
 {
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
     descriptor.m_PeepholeEnabled = true;
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> cellToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToForgetWeights(
             TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToForgetWeightsData);
 
     std::vector<float> cellToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToOutputWeights(
             TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToOutputWeightsData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     params.m_CellToForgetWeights = &cellToForgetWeights;
     params.m_CellToOutputWeights = &cellToOutputWeights;
 
     TestLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckLstmLayerPeepholeCifgDisabled")
 {
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = false;
     descriptor.m_PeepholeEnabled = true;
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> cellToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToInputWeights(
             TensorInfo(4, cellToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToInputWeightsData);
 
     std::vector<float> cellToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToForgetWeights(
             TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToForgetWeightsData);
 
     std::vector<float> cellToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToOutputWeights(
             TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToOutputWeightsData);
 
     std::vector<float> inputToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<float> recurrentToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<float> inputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputGateBiasData);
 
     LstmInputParams params;
     // Basic params
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     // Peephole params
     params.m_CellToInputWeights  = &cellToInputWeights;
     params.m_CellToForgetWeights = &cellToForgetWeights;
     params.m_CellToOutputWeights = &cellToOutputWeights;
 
     // Cifg params
     params.m_InputToInputWeights = &inputToInputWeights;
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_InputGateBias = &inputGateBias;
 
     TestLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedLstmLayerPeephole")
 {
     const char* layerName = "LstmLayer";
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
     descriptor.m_PeepholeEnabled = true;
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> cellToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToForgetWeights(
             TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToForgetWeightsData);
 
     std::vector<float> cellToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToOutputWeights(
             TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellToOutputWeightsData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     params.m_CellToForgetWeights = &cellToForgetWeights;
     params.m_CellToOutputWeights = &cellToOutputWeights;
 
     TestLstmLayerVisitor visitor(descriptor, params, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 // TODO add one with projection
 TEST_CASE("CheckLstmLayerProjection")
 {
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
     descriptor.m_ProjectionEnabled = true;
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> projectionBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> projectionBiasDimensions = {1, 1, 3, 3};
     ConstTensor projectionBias(
             TensorInfo(4, projectionBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             projectionBiasData);
 
     std::vector<float> projectionWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> projectionWeightsDimensions = {1, 1, 3, 3};
     ConstTensor projectionWeights(
             TensorInfo(4, projectionWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             projectionWeightsData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     params.m_ProjectionWeights = &projectionWeights;
     params.m_ProjectionBias = &projectionBias;
 
     TestLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedLstmLayerProjection")
 {
     const char* layerName = "LstmLayer";
     LstmDescriptor descriptor;
     descriptor.m_ActivationFunc = 3;
     descriptor.m_ClippingThresProj = 0.5f;
     descriptor.m_ClippingThresCell = 0.3f;
     descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
     descriptor.m_ProjectionEnabled = true;
 
     std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             outputGateBiasData);
 
     std::vector<float> projectionBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> projectionBiasDimensions = {1, 1, 3, 3};
     ConstTensor projectionBias(
             TensorInfo(4, projectionBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
             projectionBiasData);
 
     std::vector<float> projectionWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
     std::vector<unsigned int> projectionWeightsDimensions = {1, 1, 3, 3};
     ConstTensor projectionWeights(
             TensorInfo(4, projectionWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
             projectionWeightsData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     params.m_ProjectionWeights = &projectionWeights;
     params.m_ProjectionBias = &projectionBias;
 
     TestLstmLayerVisitor visitor(descriptor, params, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckQLstmLayerBasic")
 {
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = true;
 
     // Basic params ONLY
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     TestQLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedQLstmLayerBasic")
 {
     const char* layerName = "QLstmLayer";
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = true;
 
     // Basic params ONLY
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     LstmInputParams params;
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     TestQLstmLayerVisitor visitor(descriptor, params, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckQLstmLayerCifgDisabled")
 {
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = false;
 
     // Basic params
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     // CIFG disabled params
     std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             inputGateBiasData);
 
     LstmInputParams params;
 
     // Basic params
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     // CIFG disabled params
     params.m_InputToInputWeights     = &inputToInputWeights;
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_InputGateBias           = &inputGateBias;
 
     TestQLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckQLstmLayerCifgDisabledPeepholeEnabled")
 {
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = false;
     descriptor.m_PeepholeEnabled = true;
 
     // Basic params
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     // CIFG disabled params
     std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             inputGateBiasData);
 
     // Peephole enabled, CIFG disabled params
     std::vector<int16_t> cellToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToInputWeights(
             TensorInfo(4, cellToInputWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             cellToInputWeightsData);
 
     std::vector<int16_t> cellToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToForgetWeights(
             TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             cellToForgetWeightsData);
 
     std::vector<int16_t> cellToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToOutputWeights(
             TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             cellToOutputWeightsData);
 
     LstmInputParams params;
 
     // Basic params
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     // CIFG disabled params
     params.m_InputToInputWeights     = &inputToInputWeights;
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_InputGateBias           = &inputGateBias;
 
     // Peephole enabled, CIFG disabled params
     params.m_CellToInputWeights  = &cellToInputWeights;
     params.m_CellToForgetWeights = &cellToForgetWeights;
     params.m_CellToOutputWeights = &cellToOutputWeights;
 
     TestQLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckQLstmLayerCifgEnabledPeepholeEnabled")
 {
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = true;
     descriptor.m_PeepholeEnabled = true;
 
     // Basic params
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     // Peephole enabled and CIFG enabled params
     std::vector<int16_t> cellToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToForgetWeights(
             TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             cellToForgetWeightsData);
 
     std::vector<int16_t> cellToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellToOutputWeights(
             TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             cellToOutputWeightsData);
 
     LstmInputParams params;
 
     // Basic params
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     // Peephole enabled and CIFG enabled params
     params.m_CellToForgetWeights = &cellToForgetWeights;
     params.m_CellToOutputWeights = &cellToOutputWeights;
 
     TestQLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckQLstmLayerProjectionEnabled")
 {
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = true;
     descriptor.m_ProjectionEnabled = true;
 
     // Basic params ONLY
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     // Projection enabled params
     std::vector<uint8_t> projectionWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> projectionWeightsDimensions = {1, 1, 3, 3};
     ConstTensor projectionWeights(
             TensorInfo(4, projectionWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             projectionWeightsData);
 
     std::vector<int32_t> projectionBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> projectionBiasDimensions = {1, 1, 3, 3};
     ConstTensor projectionBias(
             TensorInfo(4, projectionBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             projectionBiasData);
 
     LstmInputParams params;
 
     // Basic params
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     // Projection enabled params
     params.m_ProjectionWeights = &projectionWeights;
     params.m_ProjectionBias    = &projectionBias;
 
     TestQLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckQLstmLayerCifgDisabledLayerNormEnabled")
 {
     QLstmDescriptor descriptor;
     descriptor.m_ProjectionClip = 0.5f;
     descriptor.m_CellClip = 0.3f;
     descriptor.m_CifgEnabled = false;
     descriptor.m_LayerNormEnabled = true;
 
     // Basic params
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     // CIFG disabled params
     std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             inputGateBiasData);
 
     // Layer Norm enabled, CIFG disabled params
     std::vector<int16_t> inputLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputLayerNormWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputLayerNormWeights(
             TensorInfo(4, inputLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             inputLayerNormWeightsData);
 
     std::vector<int16_t> forgetLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetLayerNormWeightsDimensions = {1, 1, 3, 3};
     ConstTensor forgetLayerNormWeights(
             TensorInfo(4, forgetLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             forgetLayerNormWeightsData);
 
     std::vector<int16_t> cellLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellLayerNormWeightsDimensions = {1, 1, 3, 3};
     ConstTensor cellLayerNormWeights(
             TensorInfo(4, cellLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             cellLayerNormWeightsData);
 
     std::vector<int16_t> outputLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputLayerNormWeightsDimensions = {1, 1, 3, 3};
     ConstTensor outputLayerNormWeights(
             TensorInfo(4, outputLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
             outputLayerNormWeightsData);
 
     LstmInputParams params;
 
     // Basic params
     params.m_InputToForgetWeights     = &inputToForgetWeights;
     params.m_InputToCellWeights       = &inputToCellWeights;
     params.m_InputToOutputWeights     = &inputToOutputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights   = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
     params.m_ForgetGateBias           = &forgetGateBias;
     params.m_CellBias                 = &cellBias;
     params.m_OutputGateBias           = &outputGateBias;
 
     // CIFG disabled params
     params.m_InputToInputWeights     = &inputToInputWeights;
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_InputGateBias           = &inputGateBias;
 
     // Layer Norm enabled, CIFG disabled params
     params.m_InputLayerNormWeights  = &inputLayerNormWeights;
     params.m_ForgetLayerNormWeights = &forgetLayerNormWeights;
     params.m_CellLayerNormWeights   = &cellLayerNormWeights;
     params.m_OutputLayerNormWeights = &outputLayerNormWeights;
 
     TestQLstmLayerVisitor visitor(descriptor, params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
     layer->ExecuteStrategy(visitor);
 }
 
 
 TEST_CASE("CheckQuantizedLstmLayer")
 {
     std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
 
     std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
 
     std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             inputGateBiasData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     QuantizedLstmInputParams params;
 
     params.m_InputToInputWeights = &inputToInputWeights;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
 
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
 
     params.m_InputGateBias = &inputGateBias;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     TestQuantizedLstmLayerVisitor visitor(params);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQuantizedLstmLayer(params);
     layer->ExecuteStrategy(visitor);
 }
 
 TEST_CASE("CheckNamedQuantizedLstmLayer")
 {
     const char* layerName = "LstmLayer";
     std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToInputWeights(
             TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             inputToInputWeightsData);
 
     std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToForgetWeights(
             TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             inputToForgetWeightsData);
 
     std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToCellWeights(
             TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             inputToCellWeightsData);
 
     std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor inputToOutputWeights(
             TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             inputToOutputWeightsData);
 
 
     std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToInputWeights(
             TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             recurrentToInputWeightsData);
 
     std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToForgetWeights(
             TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             recurrentToForgetWeightsData);
 
     std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToCellWeights(
             TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             recurrentToCellWeightsData);
 
     std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
     ConstTensor recurrentToOutputWeights(
             TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
             recurrentToOutputWeightsData);
 
 
     std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor inputGateBias(
             TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             inputGateBiasData);
 
     std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor forgetGateBias(
             TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             forgetGateBiasData);
 
     std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
     ConstTensor cellBias(
             TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             cellBiasData);
 
     std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
     std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
     ConstTensor outputGateBias(
             TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
             outputGateBiasData);
 
     QuantizedLstmInputParams params;
 
     params.m_InputToInputWeights = &inputToInputWeights;
     params.m_InputToForgetWeights = &inputToForgetWeights;
     params.m_InputToCellWeights = &inputToCellWeights;
     params.m_InputToOutputWeights = &inputToOutputWeights;
 
     params.m_RecurrentToInputWeights = &recurrentToInputWeights;
     params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
     params.m_RecurrentToCellWeights = &recurrentToCellWeights;
     params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
 
     params.m_InputGateBias = &inputGateBias;
     params.m_ForgetGateBias = &forgetGateBias;
     params.m_CellBias = &cellBias;
     params.m_OutputGateBias = &outputGateBias;
 
     TestQuantizedLstmLayerVisitor visitor(params, layerName);
 
     NetworkImpl net;
 
     IConnectableLayer* const layer = net.AddQuantizedLstmLayer(params, layerName);
     layer->ExecuteStrategy(visitor);
 }
 
 }

◆ TopKSort()

void TopKSort	(	unsigned int	k,
		unsigned int *	indices,
		const float *	values,
		unsigned int	numElement
	)

Definition at line 24 of file DetectionPostProcess.cpp.

Referenced by DetectionPostProcess(), NonMaxSuppression(), and TEST_SUITE().

 {
     std::partial_sort(indices, indices + k, indices + numElement,
                       [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
 }

◆ TransposeConvolution2dImpl()

void TransposeConvolution2dImpl	(	const TransposeConvolution2dDescriptor &	descriptor,
		const TensorShape &	inputShape,
		Decoder< float > &	inputDecoder,
		const TensorShape &	outputShape,
		Encoder< float > &	outputEncoder,
		const TensorShape &	weightsShape,
		Decoder< float > &	weightsDecoder,
		Decoder< float > *	biasesDecoder
	)

Definition at line 15 of file TransposeConvolution2d.cpp.

References Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorShape::GetNumElements(), DataLayoutIndexed::GetWidthIndex(), TransposeConvolution2dDescriptor::m_BiasEnabled, TransposeConvolution2dDescriptor::m_DataLayout, TransposeConvolution2dDescriptor::m_PadLeft, TransposeConvolution2dDescriptor::m_PadTop, TransposeConvolution2dDescriptor::m_StrideX, TransposeConvolution2dDescriptor::m_StrideY, NHWC, and Encoder< IType >::Set().

Referenced by RefTransposeConvolution2dWorkload::ExecuteAsync().

 {
     if (descriptor.m_BiasEnabled && !biasesDecoder)
     {
         throw InvalidArgumentException("Biases enabled but no bias data provided");
     }
     const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
     const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
 
     const unsigned int numBatches = inputShape[0];
 
     const unsigned int inputWidth  = inputShape[widthIndex];
     const unsigned int inputHeight = inputShape[heightIndex];
     const unsigned int inputDepth  = inputShape[channelsIndex];
 
     const unsigned int weightsHeight = weightsShape[heightIndex];
     const unsigned int weightsWidth  = weightsShape[widthIndex];
     const unsigned int weightsDepth  = weightsShape[channelsIndex];
 
     const unsigned int outputHeight = outputShape[heightIndex];
     const unsigned int outputWidth  = outputShape[widthIndex];
     const unsigned int outputDepth  = outputShape[channelsIndex];
 
     const unsigned int paddingLeft = descriptor.m_PadLeft;
     const unsigned int paddingTop  = descriptor.m_PadTop;
 
     const unsigned int strideX = descriptor.m_StrideX;
     const unsigned int strideY = descriptor.m_StrideY;
 
     std::vector<float> outputBuffer(outputShape.GetNumElements(), 0);
 
     const std::vector<float> inputVec = inputDecoder.DecodeTensor(inputShape);
     const std::vector<float> filterVec = weightsDecoder.DecodeTensor(weightsShape);
 
     for (unsigned int batch = 0u; batch < numBatches; ++batch)
     {
         for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput)
         {
             for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput)
             {
                 unsigned int xOutputOrigin = xInput * strideX - paddingLeft;
                 unsigned int yOutputOrigin = yInput * strideY - paddingTop;
 
                 for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
                 {
                     for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights)
                     {
                         for (unsigned int xWeights = 0u; xWeights < weightsWidth; ++xWeights)
                         {
                             unsigned int yOutput = yOutputOrigin + yWeights;
                             unsigned int xOutput = xOutputOrigin + xWeights;
 
                             if (yOutput < outputHeight && xOutput< outputWidth)
                             {
                                 for (unsigned int dInput = 0u; dInput < inputDepth; dInput++)
                                 {
                                     unsigned int inputIndex;
                                     unsigned int outputIndex;
                                     unsigned int weightsIndex;
 
                                     if(descriptor.m_DataLayout == armnn::DataLayout::NHWC)
                                     {
                                         inputIndex   = batch  * inputHeight * inputWidth * inputDepth +
                                                        yInput * inputWidth * inputDepth +
                                                        xInput * inputDepth +
                                                        dInput;
 
                                         weightsIndex = dOutput  * weightsHeight * weightsWidth * weightsDepth +
                                                        yWeights * weightsWidth * weightsDepth +
                                                        xWeights * weightsDepth +
                                                        dInput;
 
                                         outputIndex  = batch   * outputHeight * outputWidth * outputDepth +
                                                        yOutput * outputWidth * outputDepth +
                                                        xOutput * outputDepth +
                                                        dOutput;
                                     }
                                     else
                                     {
                                         inputIndex   = batch  * inputDepth * inputHeight * inputWidth +
                                                        dInput * inputHeight * inputWidth +
                                                        yInput * inputWidth +
                                                        xInput;
 
                                         weightsIndex = dOutput  * weightsDepth * weightsHeight * weightsWidth +
                                                        dInput   * weightsHeight * weightsWidth +
                                                        yWeights * weightsWidth +
                                                        xWeights;
 
                                         outputIndex  = batch   * outputDepth * outputHeight * outputWidth +
                                                        dOutput * outputHeight * outputWidth +
                                                        yOutput * outputWidth +
                                                        xOutput;
                                     }
 
                                     outputBuffer[outputIndex] += inputVec[inputIndex] * filterVec[weightsIndex];
                                 }
                             }
                         }
                     }
 
                 }
             }
         }
     }
 
     // Apply bias (if enabled)
     if (descriptor.m_BiasEnabled)
     {
         outputEncoder[0];
         Decoder<float>& rBiasesDecoder = *biasesDecoder;
 
         for (unsigned int batch = 0u; batch < numBatches; ++batch)
         {
             for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
             {
                 rBiasesDecoder[dOutput];
                 for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
                 {
                     for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)
                     {
                         const unsigned int outputIndex =
                             dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
                         outputBuffer[outputIndex] += rBiasesDecoder.Get();
                     }
                 }
             }
         }
     }
     outputEncoder[0];
     for (float output : outputBuffer)
     {
         outputEncoder.Set(output);
         ++outputEncoder;
     }
 }

◆ TrueFunc()

bool armnn::TrueFunc	(	Optional< std::string &>	reasonIfUnsupported,
		Params &&...	params
	)

Definition at line 54 of file LayerSupportCommon.hpp.

References IgnoreUnused().

 {
     IgnoreUnused(reasonIfUnsupported);
     IgnoreUnused(params...);
     return true;
 }

◆ VerifyClContextBuffer()

bool armnn::VerifyClContextBuffer ( flatbuffers::Verifier & verifier )

inline

Definition at line 157 of file ClContextSchema_generated.h.

References ClContextIdentifier().

                                    {
   return verifier.VerifyBuffer<armnn::ClContext>(ClContextIdentifier());
 }

◆ VerifySizePrefixedClContextBuffer()

bool armnn::VerifySizePrefixedClContextBuffer ( flatbuffers::Verifier & verifier )

inline

Definition at line 162 of file ClContextSchema_generated.h.

References ClContextIdentifier().

                                    {
   return verifier.VerifySizePrefixedBuffer<armnn::ClContext>(ClContextIdentifier());
 }

◆ VerifyTensorInfoDataType()

void armnn::VerifyTensorInfoDataType	(	const armnn::TensorInfo &	info,
		armnn::DataType	dataType
	)

inline

Definition at line 337 of file TypesUtils.hpp.

References TensorInfo::GetDataType(), GetDataTypeName(), and TensorInfo::GetShape().

Referenced by ParserFlatbuffersFixture::CheckTensors(), ParserFlatbuffersSerializeFixture::RunTest(), and ParserFlatbuffersFixture::RunTest().

 {
     if (info.GetDataType() != dataType)
     {
         std::stringstream ss;
         ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
            << " for tensor:" << info.GetShape()
            << ". The type expected to be: " << armnn::GetDataTypeName(dataType);
         throw armnn::Exception(ss.str());
     }
 }

◆ WrapClError()

RuntimeException armnn::WrapClError	(	const cl::Error &	clError,
		const CheckLocation &	location
	)

inline

Definition at line 147 of file ClWorkloadUtils.hpp.

References Exception::what().

Referenced by ClWorkloadFactory::AfterWorkloadsCreated(), and RunClFunction().

 {
     std::stringstream message;
     message << "CL error: " << clError.what() << ". Error code: " << clError.err();
 
     return RuntimeException(message.str(), location);
 }

Variable Documentation

◆ cpuAccCapabilities

const BackendCapabilities cpuAccCapabilities("CpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

Referenced by NeonBackend::GetCapabilities().

◆ cpuRefCapabilities

const BackendCapabilities cpuRefCapabilities("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

Referenced by RefBackend::GetCapabilities().

◆ EXPIRE_RATE

constexpr unsigned int EXPIRE_RATE = 3U

Variable to control expire rate of priority queue.

Definition at line 37 of file Types.hpp.

Referenced by Threadpool::TerminateThreadPool().

◆ g_AggregateProfilingEventsByInference

constexpr bool g_AggregateProfilingEventsByInference = true

Definition at line 37 of file Profiling.cpp.

◆ g_ProfilingEventCountHint

constexpr std::size_t g_ProfilingEventCountHint = 1024

Definition at line 29 of file Profiling.cpp.

◆ g_WriteProfilingEventSequence

constexpr bool g_WriteProfilingEventSequence = true

Definition at line 32 of file Profiling.cpp.

◆ g_WriteReportToStdOutOnProfilerDestruction

constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false

Definition at line 41 of file Profiling.cpp.

◆ gpuAccCapabilities

const BackendCapabilities gpuAccCapabilities("GpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", true}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

Referenced by ClBackend::GetCapabilities().

◆ LOWEST_CAPTURE_PERIOD

constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u

The lowest performance data capture interval we support is 10 miliseconds.

Definition at line 34 of file Types.hpp.

Referenced by TEST_SUITE().

◆ MaxNumOfTensorDimensions

constexpr unsigned int MaxNumOfTensorDimensions = 5U

Definition at line 31 of file Types.hpp.

Referenced by armnnTfLiteParser::ComputeWrappedIndex(), Concatenate(), CopyTensorContentsGeneric(), TensorShape::IsAtLeastOneDimensionSpecified(), TfLiteParserImpl::OutputShapeOfReshape(), PermutationVector::PermutationVector(), armnnUtils::Permuted(), Split(), Splitter(), TEST_SUITE(), armnnDeserializer::ToTensorInfo(), and armnnUtils::TransposeTensorShape().

◆ oldCpuRefCapabilities

const std::set<armnn::BackendCapability> oldCpuRefCapabilities

Initial value:

{
        armnn::BackendCapability::NonConstWeights,
}

Definition at line 24 of file RefBackend.hpp.

◆ paddingRequiredLayers

const std::set<armnn::LayerType> paddingRequiredLayers

Initial value:

{
    LayerType::ArgMinMax,
    LayerType::Convolution2d,
    LayerType::DepthToSpace,
    LayerType::DepthwiseConvolution2d,
    LayerType::Dequantize,
    LayerType::FullyConnected,
    LayerType::Gather,
    LayerType::Lstm,
    LayerType::Mean,
    LayerType::Permute,
    LayerType::Pooling2d,
    LayerType::Quantize,
    LayerType::QuantizedLstm,
    LayerType::Stack,
    LayerType::TransposeConvolution2d
}

Definition at line 16 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetCapabilities().

◆ tl_Profiler

thread_local IProfiler* tl_Profiler = nullptr

Definition at line 570 of file Profiling.cpp.

Referenced by ProfilerManager::GetProfiler().

◆ wordSize

constexpr size_t wordSize = sizeof(size_t) * 8

Definition at line 22 of file SingleAxisPriorityList.cpp.

Referenced by SingleAxisPriorityList::GetMemBlockStrategyType().

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

◆ ACLMemManagerOnDemand

◆ AdditionalInfoObjectPtr

◆ BackendCapabilities

◆ BackendIdSet

◆ BackendIdVector

◆ BackendsMap

◆ BaseFloat32ComparisonWorkload

◆ BaseUint8ComparisonWorkload

◆ BFloat16ToFloat32Workload

◆ BindingPointInfo

◆ BooleanWorkload

◆ CompiledBlobDeleter

◆ CompiledBlobPtr

◆ ConcatDescriptor

◆ Coordinates

◆ CopyAndImportFactoryPairs

◆ DebugCallbackFunction

◆ DepthToSpaceDescriptor

◆ Dimensions

◆ DynamicBackendPtr

◆ FactoryId

◆ Float16ToFloat32Workload

◆ Float32ToBFloat16Workload

◆ Float32ToFloat16Workload

◆ Float32Workload

◆ FloatWorkload

◆ Half

◆ HighResolutionClock

◆ IBackendContextUniquePtr

◆ IBackendInternalUniquePtr

◆ IBackendSharedPtr

◆ IBackendUniquePtr

◆ IGpuAccTunedParametersPtr

◆ IInitialiseProfilingService

◆ ILayerSupportSharedPtr

◆ IMemoryManagerUniquePtr

◆ ImportedInputId

◆ ImportedOutputId

◆ INetworkPtr

◆ InferenceTimingPair

◆ InputQueueDescriptor

◆ InputTensors

◆ instead

◆ Int32Workload

◆ IOptimizedNetworkPtr

◆ IReportStructure

◆ IRuntimePtr

◆ LayerBindingId

◆ LayerPriority

◆ LayerTypeOf

◆ LoadedNetworks

◆ LogSoftmaxDescriptor

◆ MemoryOptimizerStrategiesMapRef

◆ MemorySourceFlags

◆ MergerDescriptor

◆ MergerQueueDescriptor

◆ ModelOptions

◆ NetworkId

◆ NetworkImplPtr

◆ NetworkOptions

◆ OutputQueueDescriptor

◆ OutputTensors

◆ ParameterStringifyFunction

◆ PreCompiledObjectDeleter

◆ PreCompiledObjectPtr

◆ RefAdditionWorkload

◆ RefDebugBFloat16Workload

◆ RefDebugFloat16Workload

◆ RefDebugFloat32Workload

◆ RefDebugQAsymmS8Workload

◆ RefDebugQAsymmU8Workload

◆ RefDebugQSymmS16Workload