ArmNN
 22.05.01
armnn Namespace Reference

Copyright (c) 2021 ARM Limited and Contributors. More...

Namespaces

 experimental
 
 gatordmock
 
 optimizations
 
 profiling
 
 stringUtils
 
 test
 
 timelinedecoder
 
 utility
 

Classes

struct  abs
 
class  AbsLayer
 
struct  AbsQueueDescriptor
 
struct  ActivationDescriptor
 An ActivationDescriptor for the ActivationLayer. More...
 
class  ActivationLayer
 This layer represents an activation operation with the specified activation function. More...
 
struct  ActivationQueueDescriptor
 
class  AddedLayerObservable
 
class  AdditionLayer
 This layer represents an addition operation. More...
 
struct  AdditionQueueDescriptor
 
struct  Allocator
 
struct  ArgMinMaxDescriptor
 An ArgMinMaxDescriptor for ArgMinMaxLayer. More...
 
class  ArgMinMaxLayer
 This layer represents a ArgMinMax operation. More...
 
struct  ArgMinMaxQueueDescriptor
 
class  ArmNNProfilingServiceInitialiser
 
class  BackendCapabilityException
 
class  BackendId
 
struct  BackendOptions
 Struct for the users to pass backend specific options. More...
 
class  BackendRegistry
 
struct  BackendSettings
 
class  BackendUnavailableException
 Class for non-fatal exceptions raised while initialising a backend. More...
 
struct  BackendVersion
 
class  BadOptionalAccessException
 
struct  BaseDescriptor
 Base class for all descriptors. More...
 
class  BaseIterator
 
class  BaseMemoryManager
 
class  BaseTensor
 
class  BaseWorkload
 
struct  BatchNormalizationDescriptor
 A BatchNormalizationDescriptor for the BatchNormalizationLayer. More...
 
class  BatchNormalizationLayer
 This layer represents a batch normalization operation. More...
 
struct  BatchNormalizationQueueDescriptor
 
struct  BatchToSpaceNdDescriptor
 A BatchToSpaceNdDescriptor for the BatchToSpaceNdLayer. More...
 
class  BatchToSpaceNdLayer
 This layer represents a BatchToSpaceNd operation. More...
 
struct  BatchToSpaceNdQueueDescriptor
 
class  BFloat16
 
class  BFloat16Decoder
 
class  BFloat16Encoder
 
struct  BiasAndWeightsTypesCompatible
 
struct  BiasAndWeightsTypesMatch
 
class  BindableLayer
 
class  BooleanDecoder
 
class  BooleanDecoderBool
 
class  BooleanEncoder
 
struct  BroadcastLoop
 
struct  BufferStorage
 
struct  Capability
 Capability of the TensorHandleFactory. More...
 
class  CastLayer
 This layer represents a cast operation. More...
 
struct  CastQueueDescriptor
 
struct  ChannelShuffleDescriptor
 A ChannelShuffleDescriptor for the ChannelShuffle operator. More...
 
class  ChannelShuffleLayer
 
struct  ChannelShuffleQueueDescriptor
 
struct  CheckLocation
 
class  ClAbsWorkload
 
class  ClActivationWorkload
 
class  ClAdditionWorkload
 
class  ClArgMinMaxWorkload
 
class  ClBackend
 
class  ClBackendContext
 
class  ClBackendDefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  ClBackendModelContext
 The ClBackendModelContext is used to pass in CL specific backend ModelOptions. More...
 
class  ClBaseWorkload
 
class  ClBatchNormalizationFloatWorkload
 
class  ClBatchToSpaceNdWorkload
 
class  ClCastWorkload
 
class  ClChannelShuffleWorkload
 
class  ClComparisonWorkload
 
class  ClConcatWorkload
 
class  ClConstantWorkload
 
struct  ClContextBuilder
 
class  ClContextControl
 
class  ClContextDeserializer
 
class  ClContextSerializer
 
class  ClConvertFp16ToFp32Workload
 
class  ClConvertFp32ToFp16Workload
 
class  ClConvolution2dWorkload
 
class  ClConvolution3dWorkload
 
class  ClDepthToSpaceWorkload
 
class  ClDepthwiseConvolutionWorkload
 
class  ClDequantizeWorkload
 
class  ClDivisionWorkload
 
class  ClExpWorkload
 
class  ClFillWorkload
 
class  ClFloorFloatWorkload
 
class  ClFullyConnectedWorkload
 
class  ClGatherNdWorkload
 
class  ClGatherWorkload
 
class  ClImportSubTensorHandle
 
class  ClImportTensorHandle
 
class  ClImportTensorHandleFactory
 This factory creates ClImportTensorHandles that refer to imported memory tensors. More...
 
class  ClInstanceNormalizationWorkload
 
class  ClL2NormalizationFloatWorkload
 
class  ClLayerSupport
 
class  ClLogicalAndWorkload
 
class  ClLogicalNotWorkload
 
class  ClLogicalOrWorkload
 
class  ClLogSoftmaxWorkload
 
class  ClLogWorkload
 
class  ClLstmFloatWorkload
 
class  ClMaximumWorkload
 
class  ClMeanWorkload
 
class  ClMemoryManager
 
class  ClMinimumWorkload
 
class  ClMultiplicationWorkload
 
class  ClNegWorkload
 
class  ClNormalizationFloatWorkload
 
class  ClPadWorkload
 
class  ClPermuteWorkload
 
class  ClPooling2dWorkload
 
class  ClPooling3dWorkload
 
class  ClPreluWorkload
 
class  ClQLstmWorkload
 
class  ClQuantizedLstmWorkload
 
class  ClQuantizeWorkload
 
struct  ClRankWorkload
 
class  ClReduceWorkload
 
class  ClReshapeWorkload
 
class  ClResizeWorkload
 
class  ClRsqrtWorkload
 
class  ClRuntimeUnavailableException
 
class  ClSinWorkload
 
class  ClSliceWorkload
 
class  ClSoftmaxWorkload
 
class  ClSpaceToBatchNdWorkload
 
class  ClSpaceToDepthWorkload
 
class  ClSplitterWorkload
 
class  ClSqrtWorkload
 
class  ClStackWorkload
 
class  ClStridedSliceWorkload
 
class  ClSubTensorHandle
 
class  ClSubtractionWorkload
 
class  ClTensorHandle
 
class  ClTensorHandleFactory
 
class  ClTransposeConvolution2dWorkload
 
class  ClTransposeWorkload
 
class  ClTunedParameters
 
class  ClUnidirectionalSequenceLstmFloatWorkload
 
class  ClWorkloadFactory
 
struct  ComparisonDescriptor
 A ComparisonDescriptor for the ComparisonLayer. More...
 
class  ComparisonLayer
 This layer represents a comparison operation. More...
 
struct  ComparisonQueueDescriptor
 
class  ConcatLayer
 This layer represents a merge operation. More...
 
struct  ConcatQueueDescriptor
 
class  ConstantLayer
 A layer that the constant data can be bound to. More...
 
class  ConstantMemoryStrategy
 
struct  ConstantQueueDescriptor
 
class  ConstPassthroughTensorHandle
 
struct  ConstructInPlace
 Disambiguation tag that can be passed to the constructor to indicate that the contained object should be constructed in-place. More...
 
class  ConstTensor
 A tensor defined by a TensorInfo (shape and data type) and an immutable backing store. More...
 
class  ConstTensorHandle
 
class  ConvertBf16ToFp32Layer
 This layer converts data type BFloat16 to Float32. More...
 
struct  ConvertBf16ToFp32QueueDescriptor
 
class  ConvertFp16ToFp32Layer
 This layer converts data type Float 16 to Float 32. More...
 
struct  ConvertFp16ToFp32QueueDescriptor
 
class  ConvertFp32ToBf16Layer
 This layer converts data type Float32 to BFloat16. More...
 
struct  ConvertFp32ToBf16QueueDescriptor
 
class  ConvertFp32ToFp16Layer
 This layer converts data type Float 32 to Float 16. More...
 
struct  ConvertFp32ToFp16QueueDescriptor
 
struct  Convolution2dDescriptor
 A Convolution2dDescriptor for the Convolution2dLayer. More...
 
class  Convolution2dLayer
 This layer represents a convolution 2d operation. More...
 
struct  Convolution2dQueueDescriptor
 
struct  Convolution3dDescriptor
 A Convolution3dDescriptor for the Convolution3dLayer. More...
 
class  Convolution3dLayer
 This layer represents a convolution 3d operation. More...
 
struct  Convolution3dQueueDescriptor
 
class  CopyMemGenericWorkload
 
class  DebugLayer
 This layer visualizes the data flowing through the network. More...
 
struct  DebugQueueDescriptor
 
class  Decoder
 
class  DefaultAllocator
 Default Memory Allocator class returned from IBackendInternal::GetDefaultAllocator(MemorySource) More...
 
class  DepthToSpaceLayer
 This layer represents a DepthToSpace operation. More...
 
struct  DepthToSpaceQueueDescriptor
 
struct  DepthwiseConvolution2dDescriptor
 A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer. More...
 
class  DepthwiseConvolution2dLayer
 This layer represents a depthwise convolution 2d operation. More...
 
struct  DepthwiseConvolution2dQueueDescriptor
 Depthwise Convolution 2D layer workload data. More...
 
class  DequantizeLayer
 This layer dequantizes the input tensor. More...
 
struct  DequantizeQueueDescriptor
 
struct  DetectionPostProcessDescriptor
 
class  DetectionPostProcessLayer
 This layer represents a detection postprocess operator. More...
 
struct  DetectionPostProcessQueueDescriptor
 
class  DeviceSpec
 
class  DivisionLayer
 This layer represents a division operation. More...
 
struct  DivisionQueueDescriptor
 
class  DotAttributeSet
 
class  DotBase
 
class  DotDefaults
 
class  DotEdge
 
class  DotGraph
 
class  DotNode
 
class  DynamicBackend
 
class  DynamicBackendUtils
 
class  ElementwiseBaseLayer
 NOTE: this is an abstract class to encapsulate the element wise operations, it does not implement: std::unique_ptr<IWorkload> Layer::CreateWorkload(const IWorkloadFactory& factory) const = 0; Layer* Clone(Graph& graph) const = 0;. More...
 
struct  ElementwiseBinaryFunction
 
struct  ElementwiseUnaryDescriptor
 A ElementwiseUnaryDescriptor for the ElementwiseUnaryLayer. More...
 
struct  ElementwiseUnaryFunction
 
class  ElementwiseUnaryLayer
 This layer represents a elementwiseUnary operation. More...
 
struct  ElementwiseUnaryQueueDescriptor
 
struct  EmptyOptional
 EmptyOptional is used to initialize the Optional class in case we want to have default value for an Optional in a function declaration. More...
 
class  Encoder
 
struct  EqualQueueDescriptor
 
class  ErasedLayerNamesObservable
 
class  Event
 Event class records measurements reported by BeginEvent()/EndEvent() and returns measurements when Event::GetMeasurements() is called. More...
 
class  Exception
 Base class for all ArmNN exceptions so that users can filter to just those. More...
 
class  ExecutionFrame
 
struct  exp
 
struct  FakeQuantizationDescriptor
 A FakeQuantizationDescriptor for the FakeQuantizationLayer. More...
 
class  FakeQuantizationLayer
 This layer represents a fake quantization operation. More...
 
struct  FakeQuantizationQueueDescriptor
 
class  FileNotFoundException
 
struct  FillDescriptor
 A FillDescriptor for the FillLayer. More...
 
class  FillLayer
 This layer represents a fill operation. More...
 
struct  FillQueueDescriptor
 
class  FirstInputTypedWorkload
 
struct  FLATBUFFERS_FINAL_CLASS
 
class  Float16Decoder
 
class  Float16Encoder
 
class  Float32Decoder
 
class  Float32Encoder
 
class  FloorLayer
 This layer represents a floor operation. More...
 
struct  FloorQueueDescriptor
 
struct  FullyConnectedDescriptor
 A FullyConnectedDescriptor for the FullyConnectedLayer. More...
 
class  FullyConnectedLayer
 This layer represents a fully connected operation. More...
 
struct  FullyConnectedQueueDescriptor
 
struct  GatherDescriptor
 A GatherDescriptor for the GatherLayer. More...
 
class  GatherLayer
 This layer represents a Gather operator. More...
 
class  GatherNdLayer
 This layer represents a GatherNd operator. More...
 
struct  GatherNdQueueDescriptor
 
struct  GatherQueueDescriptor
 
class  Graph
 
class  GraphObservable
 
class  GraphValidationException
 
struct  GreaterQueueDescriptor
 
class  HtmlBold
 
class  HtmlFont
 
class  HtmlSection
 
class  HtmlSimpleTag
 
class  IAclTensorHandle
 
class  IBackend
 Each backend should implement an IBackend. More...
 
class  IBackendContext
 
class  IBackendInternal
 
class  IBackendModelContext
 
class  IClTensorHandle
 
class  ICLTensorProxy
 
class  IConnectableLayer
 Interface for a layer that is connectable to other layers via InputSlots and OutputSlots. More...
 
class  ICustomAllocator
 Custom Allocator interface. More...
 
class  IDeviceSpec
 Device specific knowledge to be passed to the optimizer. More...
 
class  IExecutionFrame
 ExecutionFrame interface to enqueue a workload computation. More...
 
class  IGpuAccTunedParameters
 Manages a set of GpuAcc parameters which have been tuned for maximum performance. More...
 
class  IGraphObservable
 
class  IInputSlot
 An input connection slot for a layer. More...
 
class  ILayerSupport
 
class  IMemoryManager
 
class  IMemoryOptimizerStrategy
 
struct  IMemoryOptimizerStrategyFactory
 
class  ImportMemGenericWorkload
 
class  INetwork
 Main network class which provides the interface for building up a neural network. More...
 
struct  INetworkProperties
 
class  InputLayer
 A layer user-provided data can be bound to (e.g. inputs, outputs). More...
 
class  InputSlot
 
struct  InstanceNormalizationDescriptor
 An InstanceNormalizationDescriptor for InstanceNormalizationLayer. More...
 
class  InstanceNormalizationLayer
 This layer represents an instance normalization operation. More...
 
struct  InstanceNormalizationQueueDescriptor
 
class  Instrument
 
class  Int32Decoder
 
class  Int32Encoder
 
class  Int32ToInt32tDecoder
 
class  Int32ToInt32tEncoder
 
class  InvalidArgumentException
 
class  IOptimizedNetwork
 
class  IOutputSlot
 An output connection slot for a layer. More...
 
class  IProfiler
 
class  IRuntime
 
struct  IsHalfType
 
struct  IsMemorySource
 
struct  IsMemorySource< MemorySource >
 
class  IStrategy
 
class  ISubgraphViewConverter
 
class  ITensorHandle
 
class  ITensorHandleFactory
 
class  IWorkload
 Workload interface to enqueue a layer computation. More...
 
class  IWorkloadFactory
 
struct  JsonChildObject
 
class  JsonPrinter
 
class  JsonUtils
 
struct  L2NormalizationDescriptor
 A L2NormalizationDescriptor for the L2NormalizationLayer. More...
 
class  L2NormalizationLayer
 This layer represents a L2 normalization operation. More...
 
struct  L2NormalizationQueueDescriptor
 
class  Layer
 
class  LayerSupportBase
 
class  LayerSupportHandle
 
struct  LayerTypeOfImpl
 
struct  LayerTypeOfImpl< LayerType::Activation >
 
struct  LayerTypeOfImpl< LayerType::Addition >
 
struct  LayerTypeOfImpl< LayerType::ArgMinMax >
 
struct  LayerTypeOfImpl< LayerType::BatchNormalization >
 
struct  LayerTypeOfImpl< LayerType::BatchToSpaceNd >
 
struct  LayerTypeOfImpl< LayerType::Cast >
 
struct  LayerTypeOfImpl< LayerType::ChannelShuffle >
 
struct  LayerTypeOfImpl< LayerType::Comparison >
 
struct  LayerTypeOfImpl< LayerType::Concat >
 
struct  LayerTypeOfImpl< LayerType::Constant >
 
struct  LayerTypeOfImpl< LayerType::ConvertBf16ToFp32 >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp16ToFp32 >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp32ToBf16 >
 
struct  LayerTypeOfImpl< LayerType::ConvertFp32ToFp16 >
 
struct  LayerTypeOfImpl< LayerType::Convolution2d >
 
struct  LayerTypeOfImpl< LayerType::Convolution3d >
 
struct  LayerTypeOfImpl< LayerType::Debug >
 
struct  LayerTypeOfImpl< LayerType::DepthToSpace >
 
struct  LayerTypeOfImpl< LayerType::DepthwiseConvolution2d >
 
struct  LayerTypeOfImpl< LayerType::Dequantize >
 
struct  LayerTypeOfImpl< LayerType::DetectionPostProcess >
 
struct  LayerTypeOfImpl< LayerType::Division >
 
struct  LayerTypeOfImpl< LayerType::ElementwiseUnary >
 
struct  LayerTypeOfImpl< LayerType::FakeQuantization >
 
struct  LayerTypeOfImpl< LayerType::Fill >
 
struct  LayerTypeOfImpl< LayerType::Floor >
 
struct  LayerTypeOfImpl< LayerType::FullyConnected >
 
struct  LayerTypeOfImpl< LayerType::Gather >
 
struct  LayerTypeOfImpl< LayerType::GatherNd >
 
struct  LayerTypeOfImpl< LayerType::Input >
 
struct  LayerTypeOfImpl< LayerType::InstanceNormalization >
 
struct  LayerTypeOfImpl< LayerType::L2Normalization >
 
struct  LayerTypeOfImpl< LayerType::LogicalBinary >
 
struct  LayerTypeOfImpl< LayerType::LogSoftmax >
 
struct  LayerTypeOfImpl< LayerType::Lstm >
 
struct  LayerTypeOfImpl< LayerType::Map >
 
struct  LayerTypeOfImpl< LayerType::Maximum >
 
struct  LayerTypeOfImpl< LayerType::Mean >
 
struct  LayerTypeOfImpl< LayerType::MemCopy >
 
struct  LayerTypeOfImpl< LayerType::MemImport >
 
struct  LayerTypeOfImpl< LayerType::Merge >
 
struct  LayerTypeOfImpl< LayerType::Minimum >
 
struct  LayerTypeOfImpl< LayerType::Multiplication >
 
struct  LayerTypeOfImpl< LayerType::Normalization >
 
struct  LayerTypeOfImpl< LayerType::Output >
 
struct  LayerTypeOfImpl< LayerType::Pad >
 
struct  LayerTypeOfImpl< LayerType::Permute >
 
struct  LayerTypeOfImpl< LayerType::Pooling2d >
 
struct  LayerTypeOfImpl< LayerType::Pooling3d >
 
struct  LayerTypeOfImpl< LayerType::PreCompiled >
 
struct  LayerTypeOfImpl< LayerType::Prelu >
 
struct  LayerTypeOfImpl< LayerType::QLstm >
 
struct  LayerTypeOfImpl< LayerType::Quantize >
 
struct  LayerTypeOfImpl< LayerType::QuantizedLstm >
 
struct  LayerTypeOfImpl< LayerType::Rank >
 
struct  LayerTypeOfImpl< LayerType::Reduce >
 
struct  LayerTypeOfImpl< LayerType::Reshape >
 
struct  LayerTypeOfImpl< LayerType::Resize >
 
struct  LayerTypeOfImpl< LayerType::Shape >
 
struct  LayerTypeOfImpl< LayerType::Slice >
 
struct  LayerTypeOfImpl< LayerType::Softmax >
 
struct  LayerTypeOfImpl< LayerType::SpaceToBatchNd >
 
struct  LayerTypeOfImpl< LayerType::SpaceToDepth >
 
struct  LayerTypeOfImpl< LayerType::Splitter >
 
struct  LayerTypeOfImpl< LayerType::Stack >
 
struct  LayerTypeOfImpl< LayerType::StandIn >
 
struct  LayerTypeOfImpl< LayerType::StridedSlice >
 
struct  LayerTypeOfImpl< LayerType::Subtraction >
 
struct  LayerTypeOfImpl< LayerType::Switch >
 
struct  LayerTypeOfImpl< LayerType::Transpose >
 
struct  LayerTypeOfImpl< LayerType::TransposeConvolution2d >
 
struct  LayerTypeOfImpl< LayerType::UnidirectionalSequenceLstm >
 
struct  LayerTypeOfImpl< LayerType::Unmap >
 
class  LayerValidationException
 
class  LayerVisitorBase
 Visitor base class with empty implementations. More...
 
class  LayerWithParameters
 
class  LoadedNetwork
 
struct  log
 
struct  LogicalBinaryDescriptor
 A LogicalBinaryDescriptor for the LogicalBinaryLayer. More...
 
struct  LogicalBinaryFunction
 
class  LogicalBinaryLayer
 This layer represents a Logical Binary operation. More...
 
struct  LogicalBinaryQueueDescriptor
 
struct  LogicalUnaryFunction
 
class  LogSink
 
class  LogSoftmaxLayer
 This layer represents a log softmax operation. More...
 
struct  LogSoftmaxQueueDescriptor
 
struct  LstmBasicParameters
 
struct  LstmDescriptor
 An LstmDescriptor for the LstmLayer. More...
 
struct  LstmInputParams
 
struct  LstmInputParamsInfo
 
class  LstmLayer
 This layer represents a LSTM operation. More...
 
struct  LstmOptCifgParameters
 
struct  LstmOptLayerNormParameters
 
struct  LstmOptPeepholeParameters
 
struct  LstmOptProjectionParameters
 
struct  LstmQueueDescriptor
 
class  LstmVisitor
 
class  ManagedConstTensorHandle
 
class  MapLayer
 This layer represents a memory copy operation. More...
 
struct  MapQueueDescriptor
 
class  MapWorkload
 
struct  maximum
 
class  MaximumLayer
 This layer represents a maximum operation. More...
 
struct  MaximumQueueDescriptor
 
struct  MeanDescriptor
 A MeanDescriptor for the MeanLayer. More...
 
class  MeanLayer
 This layer represents a mean operation. More...
 
struct  MeanQueueDescriptor
 
struct  Measurement
 
struct  MemBin
 
struct  MemBlock
 
class  MemCopyLayer
 This layer represents a memory copy operation. More...
 
struct  MemCopyQueueDescriptor
 
class  MemImportLayer
 This layer represents a memory import operation. More...
 
struct  MemImportQueueDescriptor
 
class  MemoryExportException
 
class  MemoryImportException
 
class  MemoryManager
 
class  MemoryValidationException
 
struct  MemSyncQueueDescriptor
 
class  MergeLayer
 This layer dequantizes the input tensor. More...
 
struct  MergeQueueDescriptor
 
struct  minimum
 
class  MinimumLayer
 This layer represents a minimum operation. More...
 
struct  MinimumQueueDescriptor
 
class  MockBackend
 
class  MockBackendInitialiser
 
class  MockBackendProfilingContext
 
class  MockBackendProfilingService
 
class  MockImportBackend
 
class  MockImportBackendInitialiser
 
class  MockImportLayerSupport
 
class  MockLayerSupport
 
class  MockMemoryManager
 
class  MockTensorHandle
 
class  MockTensorHandleFactory
 
class  MockWorkloadFactory
 
class  MultiplicationLayer
 This layer represents a multiplication operation. More...
 
struct  MultiplicationQueueDescriptor
 
class  MultiTypedWorkload
 
class  NeonAbsWorkload
 
class  NeonActivationWorkload
 
class  NeonAdditionWorkload
 
class  NeonArgMinMaxWorkload
 
class  NeonBackend
 
class  NeonBackendModelContext
 The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions. More...
 
class  NeonBaseWorkload
 
class  NeonBatchNormalizationWorkload
 
class  NeonBatchToSpaceNdWorkload
 
class  NeonCastWorkload
 
class  NeonChannelShuffleWorkload
 
class  NeonComparisonWorkload
 
class  NeonConcatWorkload
 
class  NeonConstantWorkload
 
class  NeonConvertBf16ToFp32Workload
 
class  NeonConvertFp16ToFp32Workload
 
class  NeonConvertFp32ToBf16Workload
 
class  NeonConvertFp32ToFp16Workload
 
class  NeonConvolution2dWorkload
 
class  NeonConvolution3dWorkload
 
class  NeonDepthToSpaceWorkload
 
class  NeonDepthwiseConvolutionWorkload
 
class  NeonDequantizeWorkload
 
class  NeonDetectionPostProcessWorkload
 
class  NeonDivisionWorkload
 
class  NeonExpWorkload
 
class  NeonFillWorkload
 
class  NeonFloorFloatWorkload
 
class  NeonFullyConnectedWorkload
 
class  NeonGatherNdWorkload
 
class  NeonGatherWorkload
 
class  NeonInstanceNormalizationWorkload
 
class  NeonInterceptorScheduler
 
class  NeonL2NormalizationFloatWorkload
 
class  NeonLayerSupport
 
class  NeonLogicalAndWorkload
 
class  NeonLogicalNotWorkload
 
class  NeonLogicalOrWorkload
 
class  NeonLogSoftmaxWorkload
 
class  NeonLogWorkload
 
class  NeonLstmFloatWorkload
 
class  NeonMaximumWorkload
 
class  NeonMeanWorkload
 
class  NeonMemoryManager
 
class  NeonMinimumWorkload
 
class  NeonMultiplicationWorkload
 
class  NeonNegWorkload
 
class  NeonNormalizationFloatWorkload
 
class  NeonPadWorkload
 
class  NeonPermuteWorkload
 
class  NeonPooling2dWorkload
 
class  NeonPooling3dWorkload
 
class  NeonPreluWorkload
 
class  NeonQLstmWorkload
 
class  NeonQuantizedLstmWorkload
 
class  NeonQuantizeWorkload
 
struct  NeonRankWorkload
 
class  NeonReduceWorkload
 
class  NeonReshapeWorkload
 
class  NeonResizeWorkload
 
class  NeonRsqrtWorkload
 
class  NeonSinWorkload
 
class  NeonSliceWorkload
 
class  NeonSoftmaxWorkload
 
class  NeonSpaceToBatchNdWorkload
 
class  NeonSpaceToDepthWorkload
 
class  NeonSplitterWorkload
 
class  NeonSqrtWorkload
 
class  NeonStackWorkload
 
class  NeonStridedSliceWorkload
 
class  NeonSubTensorHandle
 
class  NeonSubtractionWorkload
 
class  NeonTensorHandle
 
class  NeonTensorHandleFactory
 
class  NeonTimer
 
class  NeonTransposeConvolution2dWorkload
 
class  NeonTransposeWorkload
 
class  NeonUnidirectionalSequenceLstmFloatWorkload
 
class  NeonUnidirectionalSequenceLstmWorkload
 
class  NeonWorkloadFactory
 
class  NetworkImpl
 Private implementation of INetwork. More...
 
class  NodeContent
 
struct  NormalizationDescriptor
 A NormalizationDescriptor for the NormalizationLayer. More...
 
class  NormalizationLayer
 This layer represents a normalization operation. More...
 
struct  NormalizationQueueDescriptor
 
struct  NoThrowStrategy
 
struct  NullDescriptor
 Null Descriptor used as a return value from the IConnectableLayer GetParameters method by layers which do not have a descriptor. More...
 
class  NullPointerException
 
class  NullWorkload
 
class  OpenClTimer
 OpenClTimer instrument that times all OpenCl kernels executed between calls to Start() and Stop(). More...
 
class  Optimization
 
struct  OptimizationResult
 
class  OptimizationViews
 
class  OptimizedNetworkImpl
 
class  OptimizeForConnection
 
class  OptimizeForConnectionImpl
 Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...
 
class  OptimizeForExclusiveConnection
 
class  OptimizeForExclusiveConnectionImpl
 Wrapper Optimization class that calls Wrapped::Run for every connection BaseType -> ChildType. More...
 
class  OptimizeForType
 
class  OptimizeForTypeImpl
 Wrapper Optimization base class that calls Wrapped::Run() for every layer of type BaseType. More...
 
class  OptimizeForTypeImpl< Layer, Wrapped >
 Specialization that calls Wrapped::Run() for any layer type. More...
 
class  Optimizer
 
struct  OptimizerOptions
 ArmNN performs an optimization on each model/network before it gets loaded for execution. More...
 
class  Optional
 
class  OptionalBase
 OptionalBase is the common functionality between reference and non-reference optional types. More...
 
class  OptionalReferenceSwitch
 The default implementation is the non-reference case. More...
 
class  OptionalReferenceSwitch< true, T >
 This is the special case for reference types. More...
 
struct  OriginsDescriptor
 An OriginsDescriptor for the ConcatLayer. More...
 
class  OutputHandler
 
class  OutputLayer
 A layer user-provided data can be bound to (e.g. inputs, outputs). More...
 
class  OutputSlot
 
struct  PadDescriptor
 A PadDescriptor for the PadLayer. More...
 
class  PadLayer
 This layer represents a pad operation. More...
 
struct  PadQueueDescriptor
 
class  ParseException
 
class  PassthroughTensorHandle
 
class  PerAxisIterator
 PerAxisIterator for per-axis quantization. More...
 
class  PermutationVector
 
struct  PermuteDescriptor
 A PermuteDescriptor for the PermuteLayer. More...
 
class  PermuteLayer
 This layer represents a permutation operation. More...
 
struct  PermuteQueueDescriptor
 
class  PolymorphicDowncastException
 
struct  Pooling2dDescriptor
 A Pooling2dDescriptor for the Pooling2dLayer. More...
 
class  Pooling2dLayer
 This layer represents a pooling 2d operation. More...
 
struct  Pooling2dQueueDescriptor
 
struct  Pooling3dDescriptor
 A Pooling3dDescriptor for the Pooling3dLayer. More...
 
class  Pooling3dLayer
 This layer represents a pooling 3d operation. More...
 
struct  Pooling3dQueueDescriptor
 
struct  PreCompiledDescriptor
 A PreCompiledDescriptor for the PreCompiledLayer. More...
 
class  PreCompiledLayer
 
struct  PreCompiledQueueDescriptor
 
class  PredicateResult
 
class  PreluLayer
 
struct  PreluQueueDescriptor
 
class  ProfilerImpl
 
class  ProfilerManager
 
class  ProfilingDetails
 ProfilingDetails class records any details associated with the operator and passes on for outputting to the user. More...
 
struct  ProgramBuilder
 
class  QASymm8Decoder
 
class  QASymm8Encoder
 
class  QASymmS8Decoder
 
class  QASymmS8Encoder
 
struct  QLstmBasicParameters
 
struct  QLstmDescriptor
 A QLstmDescriptor for the QLstmLayer. More...
 
class  QLstmLayer
 This layer represents a QLstm operation. More...
 
struct  QLstmOptCifgParameters
 
struct  QLstmOptLayerNormParameters
 
struct  QLstmOptPeepholeParameters
 
struct  QLstmOptProjectionParameters
 
struct  QLstmQueueDescriptor
 
class  QSymm16Decoder
 
class  QSymm16Encoder
 
class  QSymm8PerAxisDecoder
 
class  QSymm8PerAxisEncoder
 
class  QSymmS8Decoder
 
class  QSymmS8Encoder
 
struct  QuantizationParametersAreEqual
 
struct  QuantizedLstmInputParams
 
struct  QuantizedLstmInputParamsInfo
 
class  QuantizedLstmLayer
 This layer represents a QuantizedLstm operation. More...
 
struct  QuantizedLstmParameters
 
struct  QuantizedLstmQueueDescriptor
 
struct  QuantizedMultiplierSmallerThanOne
 Performs multiplication of an integer with a multiplier which is less than one, using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. More...
 
class  QuantizeLayer
 
struct  QuantizeQueueDescriptor
 
struct  QueueDescriptor
 
struct  QueueDescriptorWithParameters
 
class  RangeTracker
 
class  RankLayer
 
struct  RankQueueDescriptor
 
struct  ReduceDescriptor
 A ReduceDescriptor for the REDUCE operators. More...
 
class  ReduceLayer
 This layer represents a reduction operation. More...
 
struct  ReduceQueueDescriptor
 
class  RefActivationWorkload
 
class  RefArgMinMaxWorkload
 
class  RefBackend
 
class  RefBaseWorkload
 
class  RefBatchNormalizationWorkload
 
class  RefBatchToSpaceNdWorkload
 
class  RefCastWorkload
 
class  RefChannelShuffleWorkload
 
class  RefComparisonWorkload
 
class  RefConcatWorkload
 
class  RefConstantWorkload
 
class  RefConvertBf16ToFp32Workload
 
class  RefConvertFp16ToFp32Workload
 
class  RefConvertFp32ToBf16Workload
 
class  RefConvertFp32ToFp16Workload
 
class  RefConvolution2dWorkload
 
class  RefConvolution3dWorkload
 
class  RefDebugWorkload
 
class  RefDepthToSpaceWorkload
 
class  RefDepthwiseConvolution2dWorkload
 
class  RefDequantizeWorkload
 
class  RefDetectionPostProcessWorkload
 
class  RefElementwiseUnaryWorkload
 
class  RefElementwiseWorkload
 
class  RefFakeQuantizationFloat32Workload
 
class  RefFillWorkload
 
class  RefFloorWorkload
 
class  RefFullyConnectedWorkload
 
class  RefGatherNdWorkload
 
class  RefGatherWorkload
 
class  RefInstanceNormalizationWorkload
 
class  RefL2NormalizationWorkload
 
class  RefLayerSupport
 
class  RefLogicalBinaryWorkload
 
class  RefLogicalUnaryWorkload
 
class  RefLogSoftmaxWorkload
 
class  RefLstmWorkload
 
class  RefMeanWorkload
 
class  RefMemoryManager
 
class  RefNormalizationWorkload
 
class  RefPadWorkload
 
class  RefPermuteWorkload
 
class  RefPooling2dWorkload
 
class  RefPooling3dWorkload
 
class  RefPreluWorkload
 
class  RefQLstmWorkload
 
class  RefQuantizeWorkload
 
struct  RefRankWorkload
 
class  RefReduceWorkload
 
class  RefReshapeWorkload
 
class  RefResizeWorkload
 
struct  RefShapeWorkload
 
class  RefSliceWorkload
 
class  RefSoftmaxWorkload
 
class  RefSpaceToBatchNdWorkload
 
class  RefSpaceToDepthWorkload
 
class  RefSplitterWorkload
 
class  RefStackWorkload
 
class  RefStridedSliceWorkload
 
class  RefTensorHandle
 
class  RefTensorHandleFactory
 
class  RefTransposeConvolution2dWorkload
 
class  RefTransposeWorkload
 
class  RefUnidirectionalSequenceLstmWorkload
 
class  RefWorkloadFactory
 
struct  ReshapeDescriptor
 A ReshapeDescriptor for the ReshapeLayer. More...
 
class  ReshapeLayer
 This layer represents a reshape operation. More...
 
struct  ReshapeQueueDescriptor
 
struct  ResizeDescriptor
 A ResizeBilinearDescriptor for the ResizeBilinearLayer. More...
 
class  ResizeLayer
 This layer represents a resize operation. More...
 
struct  ResizeQueueDescriptor
 
struct  ResolveTypeImpl
 
struct  ResolveTypeImpl< DataType::BFloat16 >
 
struct  ResolveTypeImpl< DataType::Boolean >
 
struct  ResolveTypeImpl< DataType::Float16 >
 
struct  ResolveTypeImpl< DataType::Float32 >
 
struct  ResolveTypeImpl< DataType::QAsymmS8 >
 
struct  ResolveTypeImpl< DataType::QAsymmU8 >
 
struct  ResolveTypeImpl< DataType::QSymmS16 >
 
struct  ResolveTypeImpl< DataType::QSymmS8 >
 
struct  ResolveTypeImpl< DataType::Signed32 >
 
struct  ResolveTypeImpl< DataType::Signed64 >
 
struct  rsqrt
 
class  RsqrtLayer
 
struct  RsqrtQueueDescriptor
 
struct  Rule
 
class  RuntimeException
 
struct  RuntimeImpl
 
class  ScaledInt32Decoder
 
class  ScaledInt32PerAxisDecoder
 
class  ScopedProfilingEvent
 
struct  ScopedRecord
 
class  ScopedTensorHandle
 
class  ShapeLayer
 
struct  ShapeQueueDescriptor
 
struct  ShapesAreBroadcastCompatible
 
struct  ShapesAreSameRank
 
struct  ShapesAreSameTotalSize
 
class  SimpleLogger
 
struct  sin
 
class  SingleAxisPriorityList
 SingleAxisPriorityList sorts the MemBlocks according to some priority, then trys to place them into as few bins as possible. More...
 
struct  SliceDescriptor
 A SliceDescriptor for the SliceLayer. More...
 
class  SliceLayer
 
struct  SliceQueueDescriptor
 
struct  SoftmaxDescriptor
 A SoftmaxDescriptor for the SoftmaxLayer. More...
 
class  SoftmaxLayer
 This layer represents a softmax operation. More...
 
struct  SoftmaxQueueDescriptor
 
struct  SpaceToBatchNdDescriptor
 A SpaceToBatchNdDescriptor for the SpaceToBatchNdLayer. More...
 
class  SpaceToBatchNdLayer
 This layer represents a SpaceToBatchNd operation. More...
 
struct  SpaceToBatchNdQueueDescriptor
 
struct  SpaceToDepthDescriptor
 A SpaceToDepthDescriptor for the SpaceToDepthLayer. More...
 
class  SpaceToDepthLayer
 This layer represents a SpaceToDepth operation. More...
 
struct  SpaceToDepthQueueDescriptor
 
class  SplitterLayer
 This layer represents a split operation. More...
 
struct  SplitterQueueDescriptor
 
struct  sqrt
 
struct  StackDescriptor
 A StackDescriptor for the StackLayer. More...
 
class  StackLayer
 This layer represents a stack operation. More...
 
struct  StackQueueDescriptor
 
class  StandardOutputSink
 
struct  StandInDescriptor
 A StandInDescriptor for the StandIn layer. More...
 
class  StandInLayer
 This layer represents an unknown operation in the input graph. More...
 
class  StrategyBase
 Strategy base class with empty implementations. More...
 
struct  StrategyFactory
 
class  StrategyValidator
 
struct  StridedSliceDescriptor
 A StridedSliceDescriptor for the StridedSliceLayer. More...
 
class  StridedSliceLayer
 This layer represents a strided slice operation. More...
 
struct  StridedSliceQueueDescriptor
 
struct  StringifyLayerParameters
 StringifyLayerParameters allows serializing layer parameters to string. More...
 
struct  StringifyLayerParameters< ActivationDescriptor >
 
struct  StringifyLayerParameters< BatchNormalizationDescriptor >
 
struct  StringifyLayerParameters< BatchToSpaceNdDescriptor >
 
struct  StringifyLayerParameters< ChannelShuffleDescriptor >
 
struct  StringifyLayerParameters< ComparisonDescriptor >
 
struct  StringifyLayerParameters< Convolution2dDescriptor >
 
struct  StringifyLayerParameters< Convolution3dDescriptor >
 
struct  StringifyLayerParameters< DepthwiseConvolution2dDescriptor >
 
struct  StringifyLayerParameters< DetectionPostProcessDescriptor >
 
struct  StringifyLayerParameters< ElementwiseUnaryDescriptor >
 
struct  StringifyLayerParameters< FakeQuantizationDescriptor >
 
struct  StringifyLayerParameters< FullyConnectedDescriptor >
 
struct  StringifyLayerParameters< L2NormalizationDescriptor >
 
struct  StringifyLayerParameters< LstmDescriptor >
 
struct  StringifyLayerParameters< MeanDescriptor >
 
struct  StringifyLayerParameters< NormalizationDescriptor >
 
struct  StringifyLayerParameters< OriginsDescriptor >
 
struct  StringifyLayerParameters< PadDescriptor >
 
struct  StringifyLayerParameters< PermuteDescriptor >
 
struct  StringifyLayerParameters< Pooling2dDescriptor >
 
struct  StringifyLayerParameters< Pooling3dDescriptor >
 
struct  StringifyLayerParameters< PreCompiledDescriptor >
 
struct  StringifyLayerParameters< ReduceDescriptor >
 
struct  StringifyLayerParameters< ReshapeDescriptor >
 
struct  StringifyLayerParameters< ResizeDescriptor >
 
struct  StringifyLayerParameters< SoftmaxDescriptor >
 
struct  StringifyLayerParameters< SpaceToBatchNdDescriptor >
 
struct  StringifyLayerParameters< SpaceToDepthDescriptor >
 
struct  StringifyLayerParameters< StackDescriptor >
 
struct  StringifyLayerParameters< StridedSliceDescriptor >
 
struct  StringifyLayerParameters< TransposeConvolution2dDescriptor >
 
struct  StringifyLayerParameters< TransposeDescriptor >
 
struct  StringifyLayerParameters< ViewsDescriptor >
 
struct  StringMapping
 StringMapping is helper class to be able to use strings as template parameters, so this allows simplifying code which only differs in a string, such as a debug string literal. More...
 
class  SubgraphView
 The SubgraphView class represents a subgraph of a Graph. More...
 
class  SubgraphViewSelector
 Algorithm that splits a Graph into Subgraphs based on a filtering of layers (e.g. More...
 
class  SubtractionLayer
 This layer represents a subtraction operation. More...
 
struct  SubtractionQueueDescriptor
 
class  SwitchLayer
 This layer calculates both true and false outputs for input. More...
 
struct  SwitchQueueDescriptor
 
class  SyncMemGenericWorkload
 
class  Tensor
 A tensor defined by a TensorInfo (shape and data type) and a mutable backing store. More...
 
class  TensorBufferArrayView
 
class  TensorHandle
 
class  TensorHandleFactoryRegistry
 
class  TensorInfo
 
struct  TensorMemory
 
struct  TensorNumDimensionsAreCorrect
 
class  TensorShape
 
class  TestBatchNormalizationLayerVisitor
 
class  TestConstantLayerVisitor
 
class  TestConvolution2dLayerVisitor
 
class  TestDepthwiseConvolution2dLayerVisitor
 
class  TestFullyConnectedLayerVistor
 
class  TestInputLayerVisitor
 
class  TestLayerVisitor
 
class  TestLstmLayerVisitor
 
class  TestOutputLayerVisitor
 
class  TestQLstmLayerVisitor
 
class  TestQuantizedLstmLayerVisitor
 
class  TestStrategy
 
struct  ThrowingStrategy
 
class  TimeoutException
 
class  TransformIterator
 
struct  TransposeConvolution2dDescriptor
 A TransposeConvolution2dDescriptor for the TransposeConvolution2dLayer. More...
 
class  TransposeConvolution2dLayer
 This layer represents a 2D transpose convolution operation. More...
 
struct  TransposeConvolution2dQueueDescriptor
 
struct  TransposeDescriptor
 A TransposeDescriptor for the TransposeLayer. More...
 
class  TransposeLayer
 This layer represents a transpose operation. More...
 
struct  TransposeQueueDescriptor
 
struct  TypeAnyOf
 
class  TypedIterator
 
class  TypedWorkload
 
struct  TypeIs
 
struct  TypeNotPerAxisQuantized
 
struct  TypesAreEqual
 
class  UnidirectionalSequenceLstmLayer
 This layer represents a LSTM operation. More...
 
struct  UnidirectionalSequenceLstmQueueDescriptor
 
class  UnimplementedException
 
class  UnmapLayer
 This layer represents a memory copy operation. More...
 
struct  UnmapQueueDescriptor
 
class  UnmapWorkload
 
struct  ViewsDescriptor
 A ViewsDescriptor for the SplitterLayer. More...
 
struct  VisitorNoThrowPolicy
 
struct  VisitorThrowingPolicy
 
class  WallClockTimer
 
class  WorkloadDataCollector
 
class  WorkloadFactoryBase
 
struct  WorkloadInfo
 Contains information about TensorInfos of a layer. More...
 

Typedefs

using BackendIdVector = std::vector< BackendId >
 
using BackendIdSet = std::unordered_set< BackendId >
 
using NetworkOptions = std::vector< BackendOptions >
 
using ModelOptions = std::vector< BackendOptions >
 
using BackendCapabilities = BackendOptions
 
using IBackendInternalUniquePtr = std::unique_ptr< IBackendInternal >
 
using MemoryOptimizerStrategiesMapRef = std::unordered_map< BackendId, std::shared_ptr< IMemoryOptimizerStrategy > >
 
using DynamicBackendPtr = std::unique_ptr< DynamicBackend >
 
using IBackendContextUniquePtr = std::unique_ptr< IBackendContext >
 
using ILayerSupportSharedPtr = std::shared_ptr< ILayerSupport >
 
using IMemoryManagerUniquePtr = std::unique_ptr< IMemoryManager >
 
using instead = ConstTensorHandle
 
template<typename QueueDescriptor >
using FloatWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Uint8Workload = TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 >
 
template<typename QueueDescriptor >
using Int32Workload = TypedWorkload< QueueDescriptor, armnn::DataType::Signed32 >
 
template<typename QueueDescriptor >
using BooleanWorkload = TypedWorkload< QueueDescriptor, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BaseFloat32ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BaseUint8ComparisonWorkload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Boolean >
 
template<typename QueueDescriptor >
using BFloat16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::BFloat16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32ToBFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::BFloat16 >
 
template<typename QueueDescriptor >
using Float16ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 >
 
template<typename QueueDescriptor >
using Float32ToFloat16Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::Float32, armnn::DataType::Float16 >
 
template<typename QueueDescriptor >
using Uint8ToFloat32Workload = MultiTypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8, armnn::DataType::Float32 >
 
using InputQueueDescriptor = MemCopyQueueDescriptor
 
using OutputQueueDescriptor = MemCopyQueueDescriptor
 
using MergerQueueDescriptor = ConcatQueueDescriptor
 
using LogSoftmaxDescriptor = SoftmaxDescriptor
 A LogSoftmaxDescriptor for the LogSoftmaxLayer. More...
 
using DepthToSpaceDescriptor = SpaceToDepthDescriptor
 A DepthToSpaceDescriptor for the DepthToSpaceLayer. More...
 
using UnidirectionalSequenceLstmDescriptor = LstmDescriptor
 
using ConcatDescriptor = OriginsDescriptor
 
using MergerDescriptor = OriginsDescriptor
 MergerDescriptor is deprecated, use ConcatDescriptor instead. More...
 
using SplitterDescriptor = ViewsDescriptor
 
using INetworkPtr = std::unique_ptr< INetwork, void(*)(INetwork *network)>
 
using IOptimizedNetworkPtr = std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)>
 
using CompiledBlobDeleter = std::function< void(const void *)>
 
using CompiledBlobPtr = std::unique_ptr< void, CompiledBlobDeleter >
 
using NetworkId = int
 
using IRuntimePtr = std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)>
 
using IGpuAccTunedParametersPtr = std::shared_ptr< IGpuAccTunedParameters >
 The following API is replaced by the backend options API. More...
 
using MemorySourceFlags = unsigned int
 
using BindingPointInfo = std::pair< armnn::LayerBindingId, armnn::TensorInfo >
 
using InputTensors = std::vector< std::pair< LayerBindingId, class ConstTensor > >
 
using OutputTensors = std::vector< std::pair< LayerBindingId, class Tensor > >
 
using IBackendSharedPtr = std::shared_ptr< IBackend >
 
using IBackendUniquePtr = std::unique_ptr< IBackend, void(*)(IBackend *backend)>
 
using LayerBindingId = int
 Type of identifiers for bindable layers (inputs, outputs). More...
 
using ImportedInputId = unsigned int
 
using ImportedOutputId = unsigned int
 
using DebugCallbackFunction = std::function< void(LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle)>
 Define the type of callback for the Debug layer to call. More...
 
using HighResolutionClock = std::chrono::high_resolution_clock::time_point
 Define a timer and associated inference ID for recording execution times. More...
 
using InferenceTimingPair = std::pair< HighResolutionClock, HighResolutionClock >
 
using TensorInfos = std::vector< TensorInfo >
 
using WorkloadQueue = std::vector< std::unique_ptr< IWorkload > >
 
using Coordinates = std::array< unsigned int, MaxNumOfTensorDimensions >
 
using Dimensions = std::array< unsigned int, MaxNumOfTensorDimensions >
 
using LayerPriority = unsigned int
 
using AdditionalInfoObjectPtr = std::shared_ptr< void >
 
using PreCompiledObjectDeleter = std::function< void(const void *)>
 
using PreCompiledObjectPtr = std::unique_ptr< void, PreCompiledObjectDeleter >
 
template<LayerType Type>
using LayerTypeOf = typename LayerTypeOfImpl< Type >::Type
 
using NetworkImplPtr = std::unique_ptr< NetworkImpl, void(*)(NetworkImpl *network)>
 
using BackendsMap = std::map< BackendId, std::unique_ptr< class IBackendInternal > >
 
template<DataType DT>
using ResolveType = typename ResolveTypeImpl< DT >::Type
 
using LoadedNetworks = std::unordered_map< NetworkId, std::unique_ptr< LoadedNetwork > >
 
using IReportStructure = arm::pipe::IReportStructure
 
using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService
 
using ParameterStringifyFunction = std::function< void(const std::string &name, const std::string &value)>
 
using FactoryId = ITensorHandleFactory::FactoryId
 
using Half = half_float::half
 
using CopyAndImportFactoryPairs = std::map< ITensorHandleFactory::FactoryId, ITensorHandleFactory::FactoryId >
 
using ACLMemManagerOnDemand = std::shared_ptr< arm_compute::MemoryManagerOnDemand >
 
using RefDebugBFloat16Workload = RefDebugWorkload< DataType::BFloat16 >
 
using RefDebugFloat16Workload = RefDebugWorkload< DataType::Float16 >
 
using RefDebugFloat32Workload = RefDebugWorkload< DataType::Float32 >
 
using RefDebugQAsymmU8Workload = RefDebugWorkload< DataType::QAsymmU8 >
 
using RefDebugQAsymmS8Workload = RefDebugWorkload< DataType::QAsymmS8 >
 
using RefDebugQSymmS16Workload = RefDebugWorkload< DataType::QSymmS16 >
 
using RefDebugQSymmS8Workload = RefDebugWorkload< DataType::QSymmS8 >
 
using RefDebugSigned32Workload = RefDebugWorkload< DataType::Signed32 >
 
template<typename DataType = float>
using RefAdditionWorkload = RefElementwiseWorkload< std::plus< DataType >, AdditionQueueDescriptor, StringMapping::RefAdditionWorkload_Execute >
 
template<typename DataType = float>
using RefSubtractionWorkload = RefElementwiseWorkload< std::minus< DataType >, SubtractionQueueDescriptor, StringMapping::RefSubtractionWorkload_Execute >
 
template<typename DataType = float>
using RefMultiplicationWorkload = RefElementwiseWorkload< std::multiplies< DataType >, MultiplicationQueueDescriptor, StringMapping::RefMultiplicationWorkload_Execute >
 
template<typename DataType = float>
using RefDivisionWorkload = RefElementwiseWorkload< std::divides< DataType >, DivisionQueueDescriptor, StringMapping::RefDivisionWorkload_Execute >
 
template<typename DataType = float>
using RefMaximumWorkload = RefElementwiseWorkload< armnn::maximum< DataType >, MaximumQueueDescriptor, StringMapping::RefMaximumWorkload_Execute >
 
template<typename DataType = float>
using RefMinimumWorkload = RefElementwiseWorkload< armnn::minimum< DataType >, MinimumQueueDescriptor, StringMapping::RefMinimumWorkload_Execute >
 
using RefPermuteBFloat16Workload = RefPermuteWorkload< DataType::BFloat16 >
 
using RefPermuteFloat16Workload = RefPermuteWorkload< DataType::Float16 >
 
using RefPermuteFloat32Workload = RefPermuteWorkload< DataType::Float32 >
 
using RefPermuteQAsymmS8Workload = RefPermuteWorkload< DataType::QAsymmS8 >
 
using RefPermuteQAsymm8Workload = RefPermuteWorkload< DataType::QAsymmU8 >
 
using RefPermuteQSymm16Workload = RefPermuteWorkload< DataType::QSymmS16 >
 
using RefTransposeBFloat16Workload = RefTransposeWorkload< DataType::BFloat16 >
 
using RefTransposeFloat16Workload = RefTransposeWorkload< DataType::Float16 >
 
using RefTransposeFloat32Workload = RefTransposeWorkload< DataType::Float32 >
 
using RefTransposeQAsymmS8Workload = RefTransposeWorkload< DataType::QAsymmS8 >
 
using RefTransposeQAsymm8Workload = RefTransposeWorkload< DataType::QAsymmU8 >
 
using RefTransposeQSymm16Workload = RefTransposeWorkload< DataType::QSymmS16 >
 

Enumerations

enum  Compute { Undefined = 0, CpuRef = 1, CpuAcc = 2, GpuAcc = 3 }
 The Compute enum is now deprecated and it is now being replaced by BackendId. More...
 
enum  CapabilityClass { PaddingRequired = 1, FallbackImportDisabled = 2, CapabilityClassMax = 254 }
 Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate. More...
 
enum  EdgeStrategy { Undefined, DirectCompatibility, ExportToTarget, CopyToTarget }
 
enum  BoostLogSeverityMapping {
  trace, debug, info, warning,
  error, fatal
}
 
enum  Status { Success = 0, Failure = 1 }
 enumeration More...
 
enum  DataType {
  Float16 = 0, Float32 = 1, QAsymmU8 = 2, Signed32 = 3,
  Boolean = 4, QSymmS16 = 5, QSymmS8 = 6, QAsymmS8 = 7,
  BFloat16 = 8, Signed64 = 9
}
 
enum  DataLayout { NCHW = 1, NHWC = 2, NDHWC = 3, NCDHW = 4 }
 
enum  ProfilingDetailsMethod { Undefined = 0, DetailsWithEvents = 1, DetailsOnly = 2 }
 Define the behaviour of the internal profiler when outputting network details. More...
 
enum  QosExecPriority { Low = 0, Medium = 1, High = 2 }
 
enum  ActivationFunction {
  Sigmoid = 0, TanH = 1, Linear = 2, ReLu = 3,
  BoundedReLu = 4, SoftReLu = 5, LeakyReLu = 6, Abs = 7,
  Sqrt = 8, Square = 9, Elu = 10, HardSwish = 11
}
 
enum  ArgMinMaxFunction { Min = 0, Max = 1 }
 
enum  ComparisonOperation {
  Equal = 0, Greater = 1, GreaterOrEqual = 2, Less = 3,
  LessOrEqual = 4, NotEqual = 5
}
 
enum  LogicalBinaryOperation { LogicalAnd = 0, LogicalOr = 1 }
 
enum  UnaryOperation {
  Abs = 0, Exp = 1, Sqrt = 2, Rsqrt = 3,
  Neg = 4, LogicalNot = 5, Log = 6, Sin = 7
}
 
enum  PoolingAlgorithm { Max = 0, Average = 1, L2 = 2 }
 
enum  ReduceOperation {
  Sum = 0, Max = 1, Mean = 2, Min = 3,
  Prod = 4
}
 
enum  ResizeMethod { Bilinear = 0, NearestNeighbor = 1 }
 
enum  Dimensionality { NotSpecified = 0, Specified = 1, Scalar = 2 }
 
enum  PaddingMethod { IgnoreValue = 0, Exclude = 1 }
 The padding method modifies the output of pooling layers. More...
 
enum  PaddingMode { Constant = 0, Reflect = 1, Symmetric = 2 }
 The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect). More...
 
enum  NormalizationAlgorithmChannel { Across = 0, Within = 1 }
 
enum  NormalizationAlgorithmMethod { LocalBrightness = 0, LocalContrast = 1 }
 
enum  OutputShapeRounding { Floor = 0, Ceiling = 1 }
 
enum  ShapeInferenceMethod { ValidateOnly = 0, InferAndValidate = 1 }
 The ShapeInferenceMethod modify how the output shapes are treated. More...
 
enum  MemorySource : uint32_t {
  Undefined = 0, Malloc = 1, DmaBuf = 2, DmaBufProtected = 4,
  Gralloc = 5
}
 Define the Memory Source to reduce copies. More...
 
enum  MemBlockStrategyType { SingleAxisPacking = 0, MultiAxisPacking = 1 }
 
enum  BackendCapability : uint32_t { NonConstWeights, AsyncExecution }
 BackendCapability class. More...
 
enum  LayerType {
  X, Activation, Addition, ArgMinMax,
  BatchNormalization, BatchToSpaceNd, Comparison, Concat,
  Constant, ConvertBf16ToFp32, ConvertFp16ToFp32, ConvertFp32ToBf16,
  ConvertFp32ToFp16, Convolution2d, Debug, DepthToSpace,
  DepthwiseConvolution2d, Dequantize, DetectionPostProcess, Division,
  ElementwiseUnary, FakeQuantization, Fill, Floor,
  FullyConnected, Gather, Input, InstanceNormalization,
  L2Normalization, LogicalBinary, LogSoftmax, Lstm,
  QLstm, Map, Maximum, Mean,
  MemCopy, MemImport, Merge, Minimum,
  Multiplication, Normalization, Output, Pad,
  Permute, Pooling2d, PreCompiled, Prelu,
  Quantize, QuantizedLstm, Reshape, Rank,
  Resize, Reduce, Slice, Softmax,
  SpaceToBatchNd, SpaceToDepth, Splitter, Stack,
  StandIn, StridedSlice, Subtraction, Switch,
  Transpose, TransposeConvolution2d, Unmap, Cast,
  Shape, UnidirectionalSequenceLstm, ChannelShuffle, Convolution3d,
  Pooling3d, GatherNd, FirstLayer = Activation, LastLayer = UnidirectionalSequenceLstm
}
 When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below. More...
 
enum  LogSeverity {
  Trace, Debug, Info, Warning,
  Error, Fatal
}
 
enum  GraphEvent { LayerAdded, LayerErased }
 
enum  JsonObjectType { Measurement, Event, ExecObjectDesc }
 
enum  TuningLevel { None, Rapid, Normal, Exhaustive }
 

Functions

LayerSupportHandle GetILayerSupportByBackendId (const armnn::BackendId &backend)
 Convenience function to retrieve the ILayerSupportHandle for a backend. More...
 
bool HasCapability (const std::string &name, const BackendCapabilities &capabilities)
 Convenience function to check if a capability exists in a BackendCapabilites struct. More...
 
bool HasCapability (const std::string &name, const armnn::BackendId &backend)
 Convenience function to check if a capability exists in a backend. More...
 
bool HasCapability (const BackendOptions::BackendOption &capability, const BackendCapabilities &capabilities)
 Convenience function to check if a given capability matches a capability in a BackendCapabilities struct. More...
 
bool HasCapability (const BackendOptions::BackendOption &backendOption, const armnn::BackendId &backend)
 Convenience function to check if a given capability matches a capability in a backend. More...
 
Optional< const BackendOptions::BackendOptionGetCapability (const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
 Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...
 
Optional< const BackendOptions::BackendOptionGetCapability (const std::string &backendCapabilityName, const armnn::BackendId &backend)
 Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted. More...
 
bool IsCapabilitySupported (const armnn::BackendId &backend, armnn::BackendCapability capability)
 Convenience function to check a capability on a backend. More...
 
unsigned int GetNumberOfCacheFiles (const armnn::BackendId &backend)
 Returns the number of cached files if backend supports caching. More...
 
constexpr char const * GetComputeDeviceAsCString (Compute compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const std::vector< Compute > &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const std::set< Compute > &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const Compute &compute)
 Deprecated function that will be removed together with the Compute enum. More...
 
std::ostream & operator<< (std::ostream &os, const BackendId &id)
 
template<template< typename... > class TContainer, typename... TContainerTemplateArgs>
std::ostream & operator<< (std::ostream &os, const TContainer< BackendId, TContainerTemplateArgs... > &ids)
 
template<typename F >
void ParseOptions (const std::vector< BackendOptions > &options, BackendId backend, F f)
 
bool ParseBooleanBackendOption (const armnn::BackendOptions::Var &value, bool defaultValue)
 
std::string ParseStringBackendOption (const armnn::BackendOptions::Var &value, std::string defaultValue)
 
int ParseIntBackendOption (const armnn::BackendOptions::Var &value, int defaultValue)
 
BackendRegistryBackendRegistryInstance ()
 
std::ostream & operator<< (std::ostream &os, const BackendVersion &backendVersion)
 
TensorShape GetUnpaddedTensorStrides (const TensorInfo &tensorInfo)
 
DataType GetBiasDataType (DataType inputDataType)
 
ARMNN_NO_DEPRECATE_WARN_BEGIN struct ARMNN_DEPRECATED_MSG_REMOVAL_DATE ("ResizeBilinearQueueDescriptor is deprecated use ResizeQueueDescriptor instead", "22.08") ResizeBilinearQueueDescriptor
 
template<typename TensorShapeIt >
OriginsDescriptor CreateDescriptorForConcatenation (TensorShapeIt first, TensorShapeIt last, unsigned int concatenationDimension)
 Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors. More...
 
template<typename ExceptionType >
void ConditionalThrow (bool condition, const std::string &message)
 
template<typename ExceptionType >
void ConditionalThrow (bool condition)
 
template<typename ExceptionType , typename ComparedType >
void ConditionalThrowIfNotEqual (const std::string &message, const ComparedType &leftHandSide, const ComparedType &rightHandSide)
 ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&) More...
 
class ARMNN_DEPRECATED_MSG_REMOVAL_DATE ("Use ABI stable IStrategy instead.", "22.05") ILayerVisitor
 
IOptimizedNetworkPtr Optimize (const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
 Create an optimized version of the network. More...
 
IOptimizedNetworkPtr Optimize (const Graph &inGraph, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options, Optional< std::vector< std::string > &> messages=EmptyOptional())
 Create an optimized version of the network. More...
 
bool IsActivationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsAdditionSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsBatchNormalizationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsBatchToSpaceNdSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsConcatSupported (const BackendId &backend, const std::vector< const TensorInfo *> inputs, const TensorInfo &output, const OriginsDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsConstantSupported (const BackendId &backend, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsConvertFp16ToFp32Supported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsConvertFp32ToFp16Supported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsConvolution2dSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsDebugSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsDepthwiseConvolutionSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsDequantizeSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsDivisionSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsEqualSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsFakeQuantizationSupported (const BackendId &backend, const TensorInfo &input, const FakeQuantizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsFloorSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsFullyConnectedSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsGreaterSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsInputSupported (const BackendId &backend, const TensorInfo &input, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsL2NormalizationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsLstmSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsMaximumSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnSupported=nullptr, size_t reasonIfUnSupportedMaxLength=0)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsMeanSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsMemCopySupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsMergeSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsMinimumSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsMultiplicationSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsNormalizationSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsOutputSupported (const BackendId &backend, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsPadSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsPermuteSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsPreCompiledSupported (const BackendId &backend, const TensorInfo &input, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsPreluSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsPooling2dSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsQuantizedLstmSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsReduceSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsReshapeSupported (const BackendId &backend, const TensorInfo &input, const ReshapeDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsResizeSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsRsqrtSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsSoftmaxSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsSpaceToBatchNdSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsSpaceToDepthSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsSplitterSupported (const BackendId &backend, const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, const ViewsDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsStackSupported (const BackendId &backend, const std::vector< const TensorInfo *> inputs, const TensorInfo &output, const StackDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsStridedSliceSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsSubtractionSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsSwitchSupported (const BackendId &backend, const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output0, const TensorInfo &output1, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
bool IsTransposeConvolution2dSupported (const BackendId &backend, const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, char *reasonIfUnsupported=nullptr, size_t reasonIfUnsupportedMaxLength=1024)
 Deprecated in favor of IBackend and ILayerSupport interfaces. More...
 
std::string LevelToString (LogSeverity level)
 
LogSeverity StringToLogLevel (std::string level)
 
void SetLogFilter (LogSeverity level)
 
void SetAllLoggingSinks (bool standardOut, bool debugOut, bool coloured)
 
constexpr LogSeverity ConvertLogSeverity (BoostLogSeverityMapping severity)
 
template<typename Arg , typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags Combine (Arg sourceA, Arg sourceB)
 
template<typename Arg , typename ... Args, typename std::enable_if< IsMemorySource< Arg >::value >::type * = nullptr>
MemorySourceFlags Combine (Arg source, Args... rest)
 
bool CheckFlag (MemorySourceFlags flags, MemorySource source)
 
template<typename T , class... Args>
Optional< T > MakeOptional (Args &&... args)
 Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object. More...
 
const char * GetLayerTypeAsCString (LayerType type)
 
constexpr char const * GetStatusAsCString (Status status)
 
constexpr char const * GetActivationFunctionAsCString (ActivationFunction activation)
 
constexpr char const * GetArgMinMaxFunctionAsCString (ArgMinMaxFunction function)
 
constexpr char const * GetComparisonOperationAsCString (ComparisonOperation operation)
 
constexpr char const * GetUnaryOperationAsCString (UnaryOperation operation)
 
constexpr char const * GetLogicalBinaryOperationAsCString (LogicalBinaryOperation operation)
 
constexpr char const * GetPoolingAlgorithmAsCString (PoolingAlgorithm pooling)
 
constexpr char const * GetOutputShapeRoundingAsCString (OutputShapeRounding rounding)
 
constexpr char const * GetPaddingMethodAsCString (PaddingMethod method)
 
constexpr char const * GetPaddingModeAsCString (PaddingMode mode)
 
constexpr char const * GetReduceOperationAsCString (ReduceOperation reduce_operation)
 
constexpr unsigned int GetDataTypeSize (DataType dataType)
 
template<unsigned N>
constexpr bool StrEqual (const char *strA, const char(&strB)[N])
 
constexpr armnn::Compute ParseComputeDevice (const char *str)
 Deprecated function that will be removed together with the Compute enum. More...
 
constexpr const char * GetDataTypeName (DataType dataType)
 
constexpr const char * GetDataLayoutName (DataLayout dataLayout)
 
constexpr const char * GetNormalizationAlgorithmChannelAsCString (NormalizationAlgorithmChannel channel)
 
constexpr const char * GetNormalizationAlgorithmMethodAsCString (NormalizationAlgorithmMethod method)
 
constexpr const char * GetResizeMethodAsCString (ResizeMethod method)
 
constexpr const char * GetMemBlockStrategyTypeName (MemBlockStrategyType memBlockStrategyType)
 
template<typename T >
constexpr bool IsQuantizedType ()
 
constexpr bool IsQuantized8BitType (DataType dataType)
 
constexpr bool IsQuantizedType (DataType dataType)
 
std::ostream & operator<< (std::ostream &os, Status stat)
 
std::ostream & operator<< (std::ostream &os, const armnn::TensorShape &shape)
 
template<typename QuantizedType >
QuantizedType Quantize (float value, float scale, int32_t offset)
 Quantize a floating point data type into an 8-bit data type. More...
 
template<typename QuantizedType >
float Dequantize (QuantizedType value, float scale, int32_t offset)
 Dequantize an 8-bit data type into a floating point data type. More...
 
void VerifyTensorInfoDataType (const armnn::TensorInfo &info, armnn::DataType dataType)
 
template<typename ... Ts>
void IgnoreUnused (Ts &&...)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_floating_point< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Source >::value &&std::is_signed< Dest >::value &&std::is_integral< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Source >::value &&std::is_integral< Source >::value &&std::is_floating_point< Dest >::value, Dest > numeric_cast (Source source)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_signed< Dest >::value &&std::is_integral< Dest >::value &&std::is_unsigned< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_floating_point< Dest >::value &&std::is_unsigned< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_signed< Source >::value &&std::is_integral< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename Dest , typename Source >
std::enable_if_t< std::is_unsigned< Dest >::value &&std::is_floating_point< Source >::value, Dest > numeric_cast (Source sValue)
 
template<typename DestType , typename SourceType >
DestType PolymorphicDowncast (SourceType *value)
 Polymorphic downcast for build in pointers only. More...
 
template<typename DestType , typename SourceType >
auto PolymorphicPointerDowncast (const SourceType &value)
 Polymorphic downcast for shared pointers and build in pointers. More...
 
std::chrono::high_resolution_clock::time_point GetTimeNow ()
 
std::chrono::duration< double, std::milli > GetTimeDuration (std::chrono::high_resolution_clock::time_point start_time)
 
template<typename Function , typename Iterator >
constexpr TransformIterator< Function, Iterator > MakeTransformIterator (Iterator i, Function f)
 
void ConfigureLogging (bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
 Configures the logging behaviour of the ARMNN library. More...
 
bool NeonDetected ()
 
const std::string GetVersion ()
 
void swap (OriginsDescriptor &first, OriginsDescriptor &second)
 
void swap (ViewsDescriptor &first, ViewsDescriptor &second)
 
uint32_t GetNumInputs (bool biasEnabled)
 
void AssertNumberOfInputSlots (Layer &layer)
 
template<typename T >
constexpr LayerType LayerEnumOf (const T *=nullptr)
 
template<>
constexpr LayerType LayerEnumOf (const ActivationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const AdditionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ArgMinMaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchNormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const BatchToSpaceNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const CastLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ChannelShuffleLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ComparisonLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConcatLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConstantLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertBf16ToFp32Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp16ToFp32Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp32ToBf16Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const ConvertFp32ToFp16Layer *)
 
template<>
constexpr LayerType LayerEnumOf (const Convolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Convolution3dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DebugLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DepthToSpaceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DepthwiseConvolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DequantizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DetectionPostProcessLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const DivisionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ElementwiseUnaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FakeQuantizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FillLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FloorLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const FullyConnectedLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const GatherLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const GatherNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const InputLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const InstanceNormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const L2NormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LogicalBinaryLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LogSoftmaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const LstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MapLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MaximumLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MeanLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MemCopyLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MemImportLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MergeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MinimumLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const MultiplicationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const NormalizationLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const OutputLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PadLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PermuteLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Pooling2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const Pooling3dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PreCompiledLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const PreluLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QuantizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const QuantizedLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const RankLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReduceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ReshapeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ResizeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const ShapeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SliceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SoftmaxLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SpaceToBatchNdLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SpaceToDepthLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SplitterLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StackLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StandInLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const StridedSliceLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SubtractionLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const SwitchLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TransposeLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const TransposeConvolution2dLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const UnidirectionalSequenceLstmLayer *)
 
template<>
constexpr LayerType LayerEnumOf (const UnmapLayer *)
 
template<typename T , typename V >
void SetValueChecked (Optional< T &> optionalRef, V &&val)
 
template<typename Float16Func , typename Float32Func , typename Uint8Func , typename Int32Func , typename BooleanFunc , typename ... Params>
bool IsSupportedForDataTypeGeneric (Optional< std::string &> reasonIfUnsupported, DataType dataType, Float16Func float16FuncPtr, Float32Func float32FuncPtr, Uint8Func uint8FuncPtr, Int32Func int32FuncPtr, BooleanFunc booleanFuncPtr, Params &&... params)
 
template<typename ... Params>
bool TrueFunc (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFunc (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncF16 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncF32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncU8 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseFuncI32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseInputFuncF32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseInputFuncF16 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseOutputFuncF32 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
template<typename ... Params>
bool FalseOutputFuncF16 (Optional< std::string &> reasonIfUnsupported, Params &&... params)
 
void CopyToOutputTensor (const Tensor &outputTensor, ITensorHandle *outputTensorHandle)
 
const armnn::ConstTensor GetInputTensor (const LayerBindingId layerId, const InputTensors &inputTensors)
 
const armnn::Tensor GetOutputTensor (const LayerBindingId layerId, const OutputTensors &outputTensors)
 
template<LogSeverity Level>
void SetLoggingSinks (bool standardOut, bool debugOut, bool coloured)
 
void ReportError (const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)
 
void ReportWarning (const std::string &warningMessage, Optional< std::vector< std::string > &> warningMessages)
 
OptimizationResult ReturnWithError (OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > &> errMessages)
 
bool CheckScaleSetOnQuantizedType (Layer *layer, Optional< std::vector< std::string > &> errMessages)
 
template<typename LayerT >
LayerT * ConvertBf16ToFp32Weight (Layer *l)
 
OptimizationResult AttemptBackendAssignment (BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > &> errMessages)
 
void AssignBackendsIConnectable (OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > &> errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, Graph::Iterator &firstLayer, Graph::Iterator &lastLayer, Optional< std::vector< std::string > &> errMessages)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView::IConnectableLayerIterator &firstLayer, SubgraphView::IConnectableLayerIterator &lastLayer, Optional< std::vector< std::string > &> errMessages)
 
OptimizationResult AssignBackends (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > &> errMessages)
 
BackendsMap CreateSupportedBackends (TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
 
OptimizationResult ApplyBackendOptimizations (OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > &> errMessages)
 
bool RequiresCopy (ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
 
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput (BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)
 
ITensorHandleFactory::FactoryId CalculateSlotOption (BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
EdgeStrategy CalculateEdgeStrategy (BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)
 
OptimizationResult SelectTensorHandleStrategy (Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, Optional< std::vector< std::string > &> errMessages)
 
std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)
 
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)
 
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore (Graph &graph, Layer &layer, bool expectCorrectInputType)
 
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter (Graph &graph, Layer &layer)
 
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter (Graph &graph, Layer &layer)
 
std::vector< DebugLayer * > InsertDebugLayerAfter (Graph &graph, Layer &layer)
 
template<typename T >
void Append (Optimizer::Optimizations &optimizations, T &&optimization)
 
template<typename Front , typename... Others>
void Append (Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
 
template<typename... Args>
Optimizer::Optimizations MakeOptimizations (Args &&... args)
 
Measurement FindMeasurement (const std::string &name, const Event *event)
 
std::vector< MeasurementFindKernelMeasurements (const Event *event)
 
const EventGetEventPtr (const Event *ptr)
 
const EventGetEventPtr (const std::unique_ptr< Event > &ptr)
 
int CalcLevel (const Event *eventPtr)
 
void ConfigureDetailsObject (JsonChildObject &detailsObject, std::string layerDetailsStr)
 
void ExtractJsonObjects (unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event *>> descendantsMap)
 
template<typename DescriptorType >
void ProfilingUpdateDescriptions (const std::string &name, const DescriptorType &desc, const WorkloadInfo &infos, const arm::pipe::ProfilingGuid guid)
 
template<typename Delegate >
void ForEachLayerInput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
 
template<typename Delegate >
void ForEachLayerOutput (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)
 
void AssignSplitId (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)
 
bool IsReadyForSplitAssignment (LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo)
 
 TEST_SUITE ("TestConstTensorLayerVisitor")
 
size_t GetProfilerEventSequenceSize (armnn::IProfiler *profiler)
 
void RuntimeLoadedNetworksReserve (armnn::RuntimeImpl *runtime)
 
 TEST_SUITE ("TestInputOutputLayerVisitor")
 
void CheckLayerBindingId (LayerBindingId visitorId, LayerBindingId id)
 
bool IsLayerSupported (const armnn::Layer *layer)
 
bool IsLayerSupported (const armnn::Layer &layer)
 
bool IsLayerOptimizable (const armnn::Layer *layer)
 
bool IsLayerOptimizable (const armnn::Layer &layer)
 
constexpr const char * MockTensorHandleFactoryId ()
 
GraphGetGraphForTesting (IOptimizedNetwork *optNet)
 
ModelOptionsGetModelOptionsForTesting (IOptimizedNetwork *optNet)
 
arm::pipe::IProfilingService & GetProfilingService (armnn::RuntimeImpl *runtime)
 
std::ostream & operator<< (std::ostream &os, const BFloat16 &b)
 
void ReportUntouchedLayers (OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
 
template<typename LayerType >
LayerTypeFuseLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
 
template<typename LayerType >
LayerTypeFuseAdditionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseSubtractionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseDivisionLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseMultiplicationLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseBatchNormalizationLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseDepthwiseConvolution2dLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
LayerTypeFuseFullyConnectedLayer (OptimizationViews &optimizationViews, LayerType *baseLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc, std::string name)
 
template<typename LayerType >
std::vector< IConnectableLayer * > ChainReduceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, ReduceDescriptor &desc)
 
template<typename LayerType >
void ReplaceLayers (OptimizationViews &optimizationViews, LayerType *baseLayer, std::vector< IConnectableLayer *> &layers)
 
arm_compute::NormalizationLayerInfo CreateAclNormalizationLayerInfoForL2Normalization (const armnn::TensorInfo &tensorInfo, armnn::DataLayout dataLayout)
 
arm_compute::ActivationLayerInfo::ActivationFunction ConvertActivationFunctionToAclActivationFunction (ActivationFunction armnnFunction)
 
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor &actDesc)
 
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo (const ActivationDescriptor *activationDescPtr)
 
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo (const QueueDescriptor &queueDescriptor)
 
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo (uint32_t activationFunction)
 
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl (const ComparisonDescriptor &descriptor)
 
arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType (PoolingAlgorithm poolingAlgorithm)
 
arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType (OutputShapeRounding rounding)
 
arm_compute::NormType ConvertNormalizationAlgorithmChannelToAclNormType (NormalizationAlgorithmChannel channelType)
 
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
 
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo (const FullyConnectedDescriptor &fullyConnectedDesc, arm_compute::ActivationLayerInfo activationLayerInfo)
 
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy (ResizeMethod resizeMethod)
 
template<typename T >
ComputeSoftmaxAclAxis (const SoftmaxDescriptor &softmaxDesc, const armnn::TensorInfo &tensor)
 
std::set< unsigned int > ComputeSplitAxis (const armnn::SplitterDescriptor &desc, const TensorShape &input)
 
int ComputeAclAxis (const int &armnnAxis, const armnn::TensorInfo &tensor)
 Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank) More...
 
unsigned int ComputePositiveAxis (const int &axis, const armnn::TensorInfo &tensor)
 Function to convert axis to its positive equivalent value. More...
 
arm_compute::Conv3dInfo ComputeConv3DInfo (const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor. More...
 
arm_compute::Conv3dInfo ComputeConv3DInfo (const armnn::Convolution3dQueueDescriptor queueDescriptor, bool isFastMathEnabled)
 
arm_compute::PaddingMode ConvertPaddingModeToAcl (const PaddingMode &paddingMode)
 
arm_compute::ReductionOperation ConvertReductionOperationToAcl (const ReduceDescriptor &descriptor)
 
const TensorInfo ComputeReductionTensorShape (const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
 Function to compute the output tensor shape based on the axes and if keepDims is set. More...
 
armnn::Optional< armnn::DataTypeGetBiasTypeFromWeightsType (armnn::Optional< armnn::DataType > weightsType)
 
template<typename F >
bool CheckSupportRule (F rule, Optional< std::string &> reasonIfUnsupported, const char *reason)
 
template<typename T >
bool AllTypesAreEqualImpl (T)
 
template<typename T , typename... Rest>
bool AllTypesAreEqualImpl (T t1, T t2, Rest... rest)
 
std::unique_ptr< IMemoryOptimizerStrategyGetMemoryOptimizerStrategy (const std::string &strategyName)
 
const std::vector< std::string > GetMemoryOptimizerStrategyNames ()
 
 TEST_SUITE ("MemoryManagerTests")
 
constexpr const char * MockImportBackendId ()
 
constexpr const char * MockBackendId ()
 
armnn::ConstTensor PermuteTensor (const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
 
void ReshapeWeightsForAcl (TensorInfo &weightInfo, DataLayout dataLayout)
 
template<typename DataType >
ConstTensor ReorderWeightChannelsForAcl (const ConstTensor &weightHandle, DataLayout dataLayout, void *permuteBuffer)
 
TensorInfo ConvertWeightTensorInfoFromArmnnToAcl (const TensorInfo &weightInfo, DataLayout dataLayout)
 
std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl (const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout dataLayout, void *permuteBuffer)
 Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library. More...
 
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl (const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
 Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier. More...
 
std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW (const ConstTensorHandle *weightTensor, const TensorInfo &inputInfo, const DataLayout &dataLayout, void *permuteBuffer)
 Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W]. More...
 
armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl (const ConstTensorHandle *weightTensor, DataLayout dataLayout, void *permuteBuffer)
 
int32_t ConvertMaskToACLFormat (int32_t mask, int32_t numDim)
 
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices (TensorInfo inputInfo0, TensorInfo inputInfo1)
 Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) More...
 
template<typename CopyFunc >
void CopyTensorContentsGeneric (const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)
 
template<typename SrcTensorHandleType , typename DstTensorHandleType , typename DescriptorType >
void GatherTensorHandlePairs (const DescriptorType &descriptor, std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType *>> &tensorHandlePairs)
 
std::string LowerString (std::string value)
 
TuningLevel ParseTuningLevel (const BackendOptions::Var &value, TuningLevel defaultValue)
 
bool ParseBoolean (const BackendOptions::Var &value, bool defaultValue)
 
std::string ParseFile (const BackendOptions::Var &value, std::string defaultValue)
 
void ConfigureTuner (arm_compute::CLTuner &tuner, TuningLevel level)
 
constexpr const char * ClBackendId ()
 
flatbuffers::Offset< ClContext > CreateClContext (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)
 
flatbuffers::Offset< ClContext > CreateClContextDirect (flatbuffers::FlatBufferBuilder &_fbb, const std::vector< flatbuffers::Offset< armnn::Program >> *programs=nullptr)
 
flatbuffers::Offset< Program > CreateProgram (flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)
 
flatbuffers::Offset< Program > CreateProgramDirect (flatbuffers::FlatBufferBuilder &_fbb, const char *name=nullptr, const std::vector< uint8_t > *binary=nullptr)
 
const armnn::ClContext * GetClContext (const void *buf)
 
const armnn::ClContext * GetSizePrefixedClContext (const void *buf)
 
const char * ClContextIdentifier ()
 
bool ClContextBufferHasIdentifier (const void *buf)
 
bool VerifyClContextBuffer (flatbuffers::Verifier &verifier)
 
bool VerifySizePrefixedClContextBuffer (flatbuffers::Verifier &verifier)
 
const char * ClContextExtension ()
 
void FinishClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)
 
void FinishSizePrefixedClContextBuffer (flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset< armnn::ClContext > root)
 
constexpr const char * ClImportTensorHandleFactoryId ()
 
constexpr const char * ClTensorHandleFactoryId ()
 
arm_compute::Status ClAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)
 
arm_compute::Status ClAdditionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)
 
arm_compute::Status ClBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)
 
arm_compute::Status ClCastValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)
 
arm_compute::Status ClComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)
 
arm_compute::Status ClConcatWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)
 
arm_compute::Status ClConstantWorkloadValidate (const TensorInfo &output)
 
arm_compute::Status ClConvertFp16ToFp32WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClConvertFp32ToFp16WorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)
 
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClFloorWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)
 
arm_compute::Status ClGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)
 
arm_compute::Status ClInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)
 
arm_compute::Status ClL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)
 
arm_compute::Status ClLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)
 
arm_compute::Status ClLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClMeanValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)
 
arm_compute::Status ClMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status ClMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)
 
arm_compute::Status ClPadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)
 
arm_compute::Status ClPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)
 
arm_compute::Status ClPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status ClPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)
 
arm_compute::Status ClPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)
 
arm_compute::Status ClQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &previousCellStateIn, const TensorInfo &previousOutputIn, const TensorInfo &cellStateOut, const TensorInfo &output, const QuantizedLstmInputParamsInfo &paramsInfo)
 
arm_compute::Status ClQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
 
arm_compute::Status ClReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)
 
arm_compute::Status ClRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)
 
arm_compute::Status ClSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status ClSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)
 
arm_compute::Status ClSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)
 
arm_compute::Status ClSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)
 
arm_compute::Status ClSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status ClStackWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const StackDescriptor &descriptor)
 
arm_compute::Status ClStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)
 
arm_compute::Status ClSubtractionValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status ClTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status ClTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)
 
arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &output, const Optional< TensorInfo > &hiddenStateOutput, const Optional< TensorInfo > &cellStateOutput, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
std::string GetConvolutionMethodString (arm_compute::ConvolutionMethod &convolutionMethod)
 
template<typename T >
void CopyArmComputeClTensorData (arm_compute::CLTensor &dstTensor, const T *srcData)
 
auto SetClStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
 
auto SetClSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
 
void InitializeArmComputeClTensorData (arm_compute::CLTensor &clTensor, const ConstTensorHandle *handle)
 
RuntimeException WrapClError (const cl::Error &clError, const CheckLocation &location)
 
void RunClFunction (arm_compute::IFunction &function, const CheckLocation &location)
 
template<typename DataType , typename PayloadType >
DataTypeGetOutputTensorData (unsigned int idx, const PayloadType &data)
 
constexpr const char * NeonBackendId ()
 
constexpr const char * NeonTensorHandleFactoryId ()
 
arm_compute::Status NeonAbsWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonActivationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ActivationDescriptor &descriptor)
 
arm_compute::Status NeonAdditionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonArgMinMaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ArgMinMaxDescriptor &descriptor)
 
arm_compute::Status NeonBatchNormalizationValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonBatchToSpaceNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const BatchToSpaceNdDescriptor &descriptor)
 
arm_compute::Status NeonCastValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonChannelShuffleValidate (const TensorInfo &input, const TensorInfo &output, const ChannelShuffleDescriptor &descriptor)
 
arm_compute::Status NeonComparisonWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ComparisonDescriptor &descriptor)
 
arm_compute::Status NeonConcatWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const OriginsDescriptor &descriptor)
 
arm_compute::Status NeonConstantWorkloadValidate (const TensorInfo &output)
 
arm_compute::Status NeonConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonConvolution3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Convolution3dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonDepthToSpaceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthToSpaceDescriptor &descriptor)
 
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonDequantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::DetectionPostProcessLayerInfo MakeInfo (const DetectionPostProcessDescriptor &descriptor)
 
arm_compute::Status NeonDetectionPostProcessValidate (const TensorInfo &boxEncodings, const TensorInfo &scores, const TensorInfo &anchors, const TensorInfo &detectionBoxes, const TensorInfo &detectionClasses, const TensorInfo &detectionScores, const TensorInfo &numDetections, const DetectionPostProcessDescriptor &descriptor)
 
arm_compute::Status NeonDivisionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonExpWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonFullyConnectedWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonGatherNdWorkloadValidate (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo)
 
arm_compute::Status NeonGatherWorkloadValidate (const TensorInfo &input, const TensorInfo &indices, const TensorInfo &output, const GatherDescriptor &descriptor)
 
arm_compute::Status NeonInstanceNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const InstanceNormalizationDescriptor &descriptor)
 
arm_compute::Status NeonL2NormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const L2NormalizationDescriptor &descriptor)
 
arm_compute::Status NeonLogicalAndWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonLogicalNotWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonLogicalOrWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonLogSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const LogSoftmaxDescriptor &descriptor)
 
arm_compute::Status NeonLogWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &scratchBuffer, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const LstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonMaximumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 
arm_compute::Status NeonMeanWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const MeanDescriptor &descriptor)
 
arm_compute::Status NeonMinimumWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output)
 Validate function for validating the inputs and output. More...
 
arm_compute::Status NeonMultiplicationWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonNegWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonNormalizationWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const NormalizationDescriptor &descriptor)
 
arm_compute::Status NeonPadWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PadDescriptor &descriptor)
 
arm_compute::Status NeonPermuteWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const PermuteDescriptor &descriptor)
 
arm_compute::Status NeonPooling2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling2dDescriptor &descriptor)
 
arm_compute::Status NeonPooling3dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const Pooling3dDescriptor &descriptor)
 
arm_compute::Status NeonPreluWorkloadValidate (const TensorInfo &input, const TensorInfo &alpha, const TensorInfo &output)
 
arm_compute::Status NeonQLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const TensorInfo &output, const QLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonQuantizedLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &cellStateIn, const TensorInfo &outputStateIn, const TensorInfo &cellStateOut, const TensorInfo &outputStateOut, const QuantizedLstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonQuantizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonReduceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
 
arm_compute::Status NeonReshapeWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonResizeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const ResizeDescriptor &descriptor)
 
arm_compute::Status NeonRsqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonSinWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SliceDescriptor &descriptor)
 
arm_compute::Status NeonSoftmaxWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SoftmaxDescriptor &descriptor)
 
arm_compute::Status NeonSpaceToBatchNdWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToBatchNdDescriptor &descriptor)
 
arm_compute::Status NeonSpaceToDepthWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const SpaceToDepthDescriptor &descriptor)
 
arm_compute::Status NeonSplitterWorkloadValidate (const TensorInfo &input, const std::vector< std::reference_wrapper< TensorInfo >> &outputs, unsigned int splitAxis)
 
arm_compute::Status NeonSqrtWorkloadValidate (const TensorInfo &input, const TensorInfo &output)
 
arm_compute::Status NeonStackWorkloadValidate (const std::vector< const TensorInfo *> &inputs, const TensorInfo &output, const StackDescriptor &descriptor)
 
arm_compute::Status NeonStridedSliceWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const StridedSliceDescriptor &descriptor)
 
arm_compute::Status NeonSubtractionWorkloadValidate (const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
 
arm_compute::Status NeonTransposeConvolution2dWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases)
 
arm_compute::Status NeonTransposeWorkloadValidate (const TensorInfo &input, const TensorInfo &output, const TransposeDescriptor &descriptor)
 
arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate (const TensorInfo &input, const TensorInfo &outputStateIn, const TensorInfo &cellStateIn, const TensorInfo &outputStateOut, const TensorInfo &cellStateOut, const TensorInfo &output, const UnidirectionalSequenceLstmDescriptor &descriptor, const LstmInputParamsInfo &paramsInfo)
 
template<typename T >
void CopyArmComputeTensorData (arm_compute::Tensor &dstTensor, const T *srcData)
 
void InitializeArmComputeTensorData (arm_compute::Tensor &tensor, const ConstTensorHandle *handle)
 
auto SetNeonStridedSliceData (const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
 
auto SetNeonSliceData (const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)
 
constexpr const char * RefBackendId ()
 
constexpr const char * RefTensorHandleFactoryId ()
 
template<DataType ArmnnType>
bool IsDataType (const WorkloadInfo &info)
 
bool IsSigned32 (const WorkloadInfo &info)
 
bool IsBFloat16 (const WorkloadInfo &info)
 
bool IsFloat16 (const WorkloadInfo &info)
 
bool IsQSymmS16 (const WorkloadInfo &info)
 
bool IsQSymmS8 (const WorkloadInfo &info)
 
bool IsQAsymmS8 (const WorkloadInfo &info)
 
bool IsQAsymmU8 (const WorkloadInfo &info)
 
template<typename QueueDescriptorType >
constexpr bool IsOperationQueueDescriptor (const QueueDescriptorType &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const MemCopyQueueDescriptor &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const ConstantQueueDescriptor &)
 
template<>
constexpr bool IsOperationQueueDescriptor (const PermuteQueueDescriptor &)
 
float Activation (float in, ActivationFunction function, float a, float b)
 
void Activation (Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)
 
template<typename OUT >
void ArgMinMax (Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
template void ArgMinMax (Decoder< float > &in, int32_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
template void ArgMinMax (Decoder< float > &in, int64_t *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
 
void BatchNormImpl (const BatchNormalizationQueueDescriptor &data, Decoder< float > &meanDecoder, Decoder< float > &varianceDecoder, Decoder< float > &betaDecoder, Decoder< float > &gammaDecoder, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
unsigned int Offset (const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)
 
void BatchToSpaceNd (const DataLayoutIndexed &dataLayout, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, const std::vector< unsigned int > &blockShape, const std::vector< std::pair< unsigned int, unsigned int >> &cropsData, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
void Concatenate (const ConcatQueueDescriptor &data, std::vector< ITensorHandle *> inputs, std::vector< ITensorHandle *> outputs)
 
void Convolve3d (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int paddingFront, unsigned int xStride, unsigned int yStride, unsigned int zStride, unsigned int xDilation, unsigned int yDilation, unsigned int zDilation)
 
void Convolve (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)
 
template<typename T >
void Debug (const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< BFloat16 > (const TensorInfo &inputInfo, const BFloat16 *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< Half > (const TensorInfo &inputInfo, const Half *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< float > (const TensorInfo &inputInfo, const float *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< uint8_t > (const TensorInfo &inputInfo, const uint8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< int8_t > (const TensorInfo &inputInfo, const int8_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< int16_t > (const TensorInfo &inputInfo, const int16_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template void Debug< int32_t > (const TensorInfo &inputInfo, const int32_t *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
 
template<typename T >
std::unique_ptr< Decoder< T > > MakeDecoder (const TensorInfo &info, const void *data=nullptr)
 
template<>
std::unique_ptr< Decoder< float > > MakeDecoder (const TensorInfo &info, const void *data)
 
template<>
std::unique_ptr< Decoder< bool > > MakeDecoder (const TensorInfo &info, const void *data)
 
template<>
std::unique_ptr< Decoder< int32_t > > MakeDecoder (const TensorInfo &info, const void *data)
 
void DepthToSpace (const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void Dequantize (Decoder< float > &inputDecoder, Encoder< float > &outputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
 
std::vector< unsigned int > GenerateRangeK (unsigned int k)
 
void TopKSort (unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
 
float IntersectionOverUnion (const float *boxI, const float *boxJ)
 
std::vector< unsigned int > NonMaxSuppression (unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)
 
void AllocateOutputData (unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float *detectionBoxes, float *detectionScores, float *detectionClasses, float *numDetections)
 
void DetectionPostProcess (const TensorInfo &boxEncodingsInfo, const TensorInfo &scoresInfo, const TensorInfo &anchorsInfo, const TensorInfo &detectionBoxesInfo, const TensorInfo &detectionClassesInfo, const TensorInfo &detectionScoresInfo, const TensorInfo &numDetectionsInfo, const DetectionPostProcessDescriptor &desc, Decoder< float > &boxEncodings, Decoder< float > &scores, Decoder< float > &anchors, float *detectionBoxes, float *detectionClasses, float *detectionScores, float *numDetections)
 
template<typename T >
std::unique_ptr< Encoder< T > > MakeEncoder (const TensorInfo &info, void *data=nullptr)
 
template<>
std::unique_ptr< Encoder< float > > MakeEncoder (const TensorInfo &info, void *data)
 
template<>
std::unique_ptr< Encoder< bool > > MakeEncoder (const TensorInfo &info, void *data)
 
template<>
std::unique_ptr< Encoder< int32_t > > MakeEncoder (const TensorInfo &info, void *data)
 
void Fill (Encoder< float > &output, const TensorShape &desiredOutputShape, const float value)
 Creates a tensor and fills it with a scalar value. More...
 
void FullyConnected (const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *rBiasDecoder, bool biasEnabled, unsigned int K, bool transposeWeights)
 Performs a matrix multiplication and optionally adds a bias. More...
 
void Gather (const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis)
 
void InstanceNorm (const InstanceNormalizationQueueDescriptor &data, const TensorInfo &inputInfo, Decoder< float > &inputDecoder, Encoder< float > &outputEncoder)
 
void LogSoftmax (Decoder< float > &input, Encoder< float > &output, const TensorInfo &inputInfo, const LogSoftmaxDescriptor &descriptor)
 
void LstmImpl (const LstmDescriptor &descriptor, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const TensorShape &inputToOutputWeightsShape, const TensorShape &recurrentToOutputWeightsShape, std::unique_ptr< Decoder< float >> &inputData, std::unique_ptr< Decoder< float >> &outputStateIn, std::unique_ptr< Decoder< float >> &cellStateIn, std::unique_ptr< Encoder< float >> &outputStateOut, std::unique_ptr< Encoder< float >> &cellStateOut, std::unique_ptr< Encoder< float >> &output, std::unique_ptr< Decoder< float >> &cellStateOutDecoder, std::unique_ptr< Decoder< float >> &outputDecoder, std::unique_ptr< Decoder< float >> &inputToInputWeightsTensor, std::unique_ptr< Decoder< float >> &inputToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &inputToCellWeightsTensor, std::unique_ptr< Decoder< float >> &inputToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToInputWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToCellWeightsTensor, std::unique_ptr< Decoder< float >> &recurrentToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToInputWeightsTensor, std::unique_ptr< Decoder< float >> &cellToForgetWeightsTensor, std::unique_ptr< Decoder< float >> &cellToOutputWeightsTensor, std::unique_ptr< Decoder< float >> &inputGateBiasTensor, std::unique_ptr< Decoder< float >> &forgetGateBiasTensor, std::unique_ptr< Decoder< float >> &cellBiasTensor, std::unique_ptr< Decoder< float >> &outputGateBiasTensor, std::unique_ptr< Decoder< float >> &projectionWeightsTensor, std::unique_ptr< Decoder< float >> &projectionBiasTensor, std::unique_ptr< Decoder< float >> &inputLayerNormWeights, std::unique_ptr< Decoder< float >> &forgetLayerNormWeights, std::unique_ptr< Decoder< float >> &cellLayerNormWeights, std::unique_ptr< Decoder< float >> &outputLayerNormWeights, std::unique_ptr< Encoder< float >> &inputGateScratch, std::unique_ptr< Encoder< float >> &cellScratch, std::unique_ptr< Encoder< float >> &forgetGateScratch, std::unique_ptr< Encoder< float >> &outputGateScratch, std::unique_ptr< Decoder< float >> &inputGateScratchDecoder, std::unique_ptr< Decoder< float >> &cellScratchDecoder, std::unique_ptr< Decoder< float >> &forgetGateScratchDecoder, std::unique_ptr< Decoder< float >> &outputGateScratchDecoder, float layerNormEpsilon)
 
void MirrorPad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
 
void Pad (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const ITensorHandle *inputHandle, ITensorHandle *outputHandle, const PadQueueDescriptor &data)
 
void Pooling2d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
 Computes the Pooling2d operation. More...
 
void Pooling3d (Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling3dDescriptor &params)
 Computes the Pooling3d operation. More...
 
void PreluImpl (const TensorInfo &inputInfo, const TensorInfo &alphaInfo, const TensorInfo &outputInfo, Decoder< float > &inputData, Decoder< float > &alphaData, Encoder< float > &outputData)
 
bool NextIndex (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)
 
unsigned int ReducedOutputOffset (const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)
 
void Reduce (const TensorInfo &inputInfo, const TensorInfo &outputInfo, Decoder< float > &input, Encoder< float > &output, const std::vector< uint32_t > axis, const ReduceOperation reduceOperation)
 
void FakeQuantization (const float *inputData, float *outputData, uint32_t numElements, float min, float max)
 
unsigned int GetNumActivations (const TensorInfo &inputInfo)
 
const TensorInfoGetTensorInfo (const ITensorHandle *tensorHandle)
 float32 helpers More...
 
template<typename DataType , typename PayloadType >
const DataTypeGetInputTensorData (unsigned int idx, const PayloadType &data)
 
template<typename DataType >
DataTypeGetOutputTensorData (ITensorHandle *tensorHandle)
 
template<typename PayloadType >
const float * GetInputTensorDataFloat (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
float * GetOutputTensorDataFloat (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
const HalfGetInputTensorDataHalf (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
HalfGetOutputTensorDataHalf (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
const BFloat16GetInputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)
 
template<typename PayloadType >
BFloat16GetOutputTensorDataBFloat16 (unsigned int idx, const PayloadType &data)
 
template<typename T >
std::vector< float > Dequantize (const T *quant, const TensorInfo &info)
 u8 helpers More...
 
template<typename T >
void Dequantize (const T *inputData, float *outputData, const TensorInfo &info)
 
void Quantize (uint8_t *quant, const float *dequant, const TensorInfo &info)
 
void Resize (Decoder< float > &in, const TensorInfo &inputInfo, Encoder< float > &out, const TensorInfo &outputInfo, DataLayoutIndexed dataLayout, armnn::ResizeMethod resizeMethod, bool alignCorners, bool halfPixelCenters)
 
void Slice (const TensorInfo &inputInfo, const SliceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void Softmax (Decoder< float > &in, Encoder< float > &out, const TensorInfo &inputTensorInfo, float beta, int axis)
 Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. More...
 
unsigned int GetOffset (const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
 
void SpaceToBatchNd (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToBatchNdDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void SpaceToDepth (const TensorInfo &inputInfo, const TensorInfo &outputInfo, const SpaceToDepthDescriptor &params, Decoder< float > &inputData, Encoder< float > &outputData)
 
void Split (const SplitterQueueDescriptor &data, std::vector< ITensorHandle *> inputs, std::vector< ITensorHandle *> outputs)
 
template<typename DataType >
void Splitter (const SplitterQueueDescriptor &data, std::vector< ITensorHandle *> inputs, std::vector< ITensorHandle *> outputs)
 
void Stack (const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
 
void StridedSlice (const TensorInfo &inputInfo, const StridedSliceDescriptor &params, const void *inputData, void *outputData, unsigned int dataTypeSize)
 
void TransposeConvolution2dImpl (const TransposeConvolution2dDescriptor &descriptor, const TensorShape &inputShape, Decoder< float > &inputDecoder, const TensorShape &outputShape, Encoder< float > &outputEncoder, const TensorShape &weightsShape, Decoder< float > &weightsDecoder, Decoder< float > *biasesDecoder)
 
std::istream & operator>> (std::istream &in, armnn::Compute &compute)
 
std::istream & operator>> (std::istream &in, armnn::BackendId &backend)
 

Variables

constexpr unsigned int MaxNumOfTensorDimensions = 5U
 
constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u
 The lowest performance data capture interval we support is 10 miliseconds. More...
 
constexpr unsigned int EXPIRE_RATE = 3U
 Variable to control expire rate of priority queue. More...
 
constexpr std::size_t g_ProfilingEventCountHint = 1024
 
constexpr bool g_WriteProfilingEventSequence = true
 
constexpr bool g_AggregateProfilingEventsByInference = true
 
constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false
 
thread_local IProfilertl_Profiler = nullptr
 
constexpr size_t wordSize = sizeof(size_t) * 8
 
const BackendCapabilities gpuAccCapabilities ("GpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", true}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })
 
const BackendCapabilities cpuAccCapabilities ("CpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })
 
const std::set< armnn::LayerTypepaddingRequiredLayers
 
const BackendCapabilities cpuRefCapabilities ("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })
 
const std::set< armnn::BackendCapabilityoldCpuRefCapabilities
 

Detailed Description

Copyright (c) 2021 ARM Limited and Contributors.

Optional is a drop in replacement for std::optional until we migrate to c++-17.

Create pages for each tool so they appear nicely in the doxygen tree-view.

All rights reserved.

SPDX-License-Identifier: MIT

Subpages are not listed there. Also we can overwrite the page name this way.

Subpages are not listed there.

Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox

Only a subset of the optional features are implemented that we intend to use in ArmNN. There are two distinct implementations here:

1, for normal constructable/destructable types and reference types 2, for reference types The std::optional features we support are:

  • has_value() and operator bool() to tell if the optional has a value
  • value() returns a reference to the held object

Typedef Documentation

◆ ACLMemManagerOnDemand

using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>

Definition at line 22 of file NeonFullyConnectedWorkload.cpp.

◆ AdditionalInfoObjectPtr

using AdditionalInfoObjectPtr = std::shared_ptr<void>

Definition at line 213 of file Layer.hpp.

◆ BackendCapabilities

Definition at line 19 of file BackendOptions.hpp.

◆ BackendIdSet

using BackendIdSet = std::unordered_set<BackendId>

Definition at line 193 of file BackendId.hpp.

◆ BackendIdVector

using BackendIdVector = std::vector<BackendId>

Definition at line 192 of file BackendId.hpp.

◆ BackendsMap

using BackendsMap = std::map<BackendId, std::unique_ptr<class IBackendInternal> >

Definition at line 294 of file Network.hpp.

◆ BaseFloat32ComparisonWorkload

◆ BaseUint8ComparisonWorkload

◆ BFloat16ToFloat32Workload

◆ BindingPointInfo

Definition at line 274 of file Tensor.hpp.

◆ BooleanWorkload

◆ CompiledBlobDeleter

typedef std::function< void(const void *)> CompiledBlobDeleter

Definition at line 244 of file INetwork.hpp.

◆ CompiledBlobPtr

typedef std::unique_ptr< void, CompiledBlobDeleter > CompiledBlobPtr

Definition at line 245 of file INetwork.hpp.

◆ ConcatDescriptor

Definition at line 55 of file DescriptorsFwd.hpp.

◆ Coordinates

using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 15 of file InternalTypes.hpp.

◆ CopyAndImportFactoryPairs

◆ DebugCallbackFunction

using DebugCallbackFunction = std::function<void(LayerGuid guid, unsigned int slotIndex, ITensorHandle* tensorHandle)>

Define the type of callback for the Debug layer to call.

Parameters
guid- guid of layer connected to the input of the Debug layer
slotIndex- index of the output slot connected to the input of the Debug layer
tensorHandle- TensorHandle for the input tensor to the Debug layer

Definition at line 379 of file Types.hpp.

◆ DepthToSpaceDescriptor

A DepthToSpaceDescriptor for the DepthToSpaceLayer.

Definition at line 1080 of file Descriptors.hpp.

◆ Dimensions

using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>

Definition at line 16 of file InternalTypes.hpp.

◆ DynamicBackendPtr

using DynamicBackendPtr = std::unique_ptr<DynamicBackend>

Definition at line 52 of file DynamicBackend.hpp.

◆ FactoryId

◆ Float16ToFloat32Workload

◆ Float32ToBFloat16Workload

◆ Float32ToFloat16Workload

◆ Float32Workload

◆ FloatWorkload

◆ Half

using Half = half_float::half

Definition at line 18 of file Half.hpp.

◆ HighResolutionClock

using HighResolutionClock = std::chrono::high_resolution_clock::time_point

Define a timer and associated inference ID for recording execution times.

Definition at line 382 of file Types.hpp.

◆ IBackendContextUniquePtr

using IBackendContextUniquePtr = std::unique_ptr<IBackendContext>

Definition at line 34 of file IBackendContext.hpp.

◆ IBackendInternalUniquePtr

typedef std::unique_ptr< IBackendInternal > IBackendInternalUniquePtr

Definition at line 32 of file BackendRegistry.hpp.

◆ IBackendSharedPtr

using IBackendSharedPtr = std::shared_ptr<IBackend>

Definition at line 263 of file Types.hpp.

◆ IBackendUniquePtr

using IBackendUniquePtr = std::unique_ptr<IBackend, void(*)(IBackend* backend)>

Definition at line 264 of file Types.hpp.

◆ IGpuAccTunedParametersPtr

The following API is replaced by the backend options API.

Definition at line 295 of file IRuntime.hpp.

◆ IInitialiseProfilingService

using IInitialiseProfilingService = arm::pipe::IInitialiseProfilingService

Definition at line 28 of file Runtime.hpp.

◆ ILayerSupportSharedPtr

using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>

Definition at line 572 of file ILayerSupport.hpp.

◆ IMemoryManagerUniquePtr

using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>

Definition at line 24 of file IMemoryManager.hpp.

◆ ImportedInputId

using ImportedInputId = unsigned int

Definition at line 291 of file Types.hpp.

◆ ImportedOutputId

using ImportedOutputId = unsigned int

Definition at line 292 of file Types.hpp.

◆ INetworkPtr

using INetworkPtr = std::unique_ptr<INetwork, void(*)(INetwork* network)>

Definition at line 241 of file INetwork.hpp.

◆ InferenceTimingPair

Definition at line 383 of file Types.hpp.

◆ InputQueueDescriptor

Definition at line 91 of file WorkloadData.hpp.

◆ InputTensors

using InputTensors = std::vector<std::pair<LayerBindingId, class ConstTensor> >

Definition at line 392 of file Tensor.hpp.

◆ instead

Definition at line 255 of file TensorHandle.hpp.

◆ Int32Workload

◆ IOptimizedNetworkPtr

using IOptimizedNetworkPtr = std::unique_ptr<IOptimizedNetwork, void(*)(IOptimizedNetwork* network)>

Definition at line 242 of file INetwork.hpp.

◆ IReportStructure

using IReportStructure = arm::pipe::IReportStructure

Definition at line 27 of file Runtime.hpp.

◆ IRuntimePtr

using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>

Definition at line 33 of file IRuntime.hpp.

◆ LayerBindingId

using LayerBindingId = int

Type of identifiers for bindable layers (inputs, outputs).

Definition at line 290 of file Types.hpp.

◆ LayerPriority

using LayerPriority = unsigned int

Definition at line 212 of file Layer.hpp.

◆ LayerTypeOf

using LayerTypeOf = typename LayerTypeOfImpl<Type>::Type

Definition at line 90 of file LayersFwd.hpp.

◆ LoadedNetworks

using LoadedNetworks = std::unordered_map<NetworkId, std::unique_ptr<LoadedNetwork> >

Definition at line 26 of file Runtime.hpp.

◆ LogSoftmaxDescriptor

A LogSoftmaxDescriptor for the LogSoftmaxLayer.

Definition at line 169 of file Descriptors.hpp.

◆ MemoryOptimizerStrategiesMapRef

using MemoryOptimizerStrategiesMapRef = std::unordered_map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy> >

Definition at line 33 of file BackendRegistry.hpp.

◆ MemorySourceFlags

using MemorySourceFlags = unsigned int

Definition at line 15 of file MemorySources.hpp.

◆ MergerDescriptor

MergerDescriptor is deprecated, use ConcatDescriptor instead.

Definition at line 59 of file DescriptorsFwd.hpp.

◆ MergerQueueDescriptor

Definition at line 149 of file WorkloadData.hpp.

◆ ModelOptions

using ModelOptions = std::vector<BackendOptions>

Definition at line 18 of file BackendOptions.hpp.

◆ NetworkId

typedef int NetworkId

Definition at line 27 of file IRuntime.hpp.

◆ NetworkImplPtr

using NetworkImplPtr = std::unique_ptr<NetworkImpl, void (*)(NetworkImpl* network)>

Definition at line 28 of file Network.hpp.

◆ NetworkOptions

using NetworkOptions = std::vector<BackendOptions>

Definition at line 16 of file BackendOptions.hpp.

◆ OutputQueueDescriptor

Definition at line 92 of file WorkloadData.hpp.

◆ OutputTensors

using OutputTensors = std::vector<std::pair<LayerBindingId, class Tensor> >

Definition at line 393 of file Tensor.hpp.

◆ ParameterStringifyFunction

using ParameterStringifyFunction = std::function<void(const std::string& name, const std::string& value)>

Definition at line 14 of file SerializeLayerParameters.hpp.

◆ PreCompiledObjectDeleter

using PreCompiledObjectDeleter = std::function<void(const void*)>

Definition at line 19 of file PreCompiledLayer.hpp.

◆ PreCompiledObjectPtr

using PreCompiledObjectPtr = std::unique_ptr<void, PreCompiledObjectDeleter>

Definition at line 20 of file PreCompiledLayer.hpp.

◆ RefAdditionWorkload

◆ RefDebugBFloat16Workload

◆ RefDebugFloat16Workload

◆ RefDebugFloat32Workload

◆ RefDebugQAsymmS8Workload

◆ RefDebugQAsymmU8Workload

◆ RefDebugQSymmS16Workload

◆ RefDebugQSymmS8Workload

◆ RefDebugSigned32Workload

◆ RefDivisionWorkload

◆ RefMaximumWorkload

◆ RefMinimumWorkload

◆ RefMultiplicationWorkload

◆ RefPermuteBFloat16Workload

◆ RefPermuteFloat16Workload

◆ RefPermuteFloat32Workload

◆ RefPermuteQAsymm8Workload

◆ RefPermuteQAsymmS8Workload

◆ RefPermuteQSymm16Workload

◆ RefSubtractionWorkload

◆ RefTransposeBFloat16Workload

◆ RefTransposeFloat16Workload

◆ RefTransposeFloat32Workload

◆ RefTransposeQAsymm8Workload

◆ RefTransposeQAsymmS8Workload

◆ RefTransposeQSymm16Workload

◆ ResolveType

using ResolveType = typename ResolveTypeImpl<DT>::Type

Definition at line 79 of file ResolveType.hpp.

◆ SplitterDescriptor

Definition at line 60 of file DescriptorsFwd.hpp.

◆ TensorInfos

using TensorInfos = std::vector<TensorInfo>

Definition at line 151 of file BackendHelper.cpp.

◆ Uint8ToFloat32Workload

◆ Uint8Workload

◆ UnidirectionalSequenceLstmDescriptor

◆ WorkloadQueue

using WorkloadQueue = std::vector< std::unique_ptr<IWorkload> >

Definition at line 13 of file ExecutionFrame.hpp.

Enumeration Type Documentation

◆ ActivationFunction

enum ActivationFunction
strong
Enumerator
Sigmoid 
TanH 
Linear 
ReLu 
BoundedReLu 

min(a, max(b, input)) ReLu1 & ReLu6.

SoftReLu 
LeakyReLu 
Abs 
Sqrt 
Square 
Elu 
HardSwish 

Definition at line 86 of file Types.hpp.

◆ ArgMinMaxFunction

enum ArgMinMaxFunction
strong
Enumerator
Min 
Max 

Definition at line 102 of file Types.hpp.

◆ BackendCapability

enum BackendCapability : uint32_t
strong

BackendCapability class.

Enumerator
NonConstWeights 

Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be accessed through inputs.

AsyncExecution 

Asynchronous Execution.

Definition at line 267 of file Types.hpp.

267  : uint32_t
268 {
269  /// Constant weights can be accessed through the descriptors,
270  /// On the other hand, non-const weights can be accessed through inputs.
272 
273  /// Asynchronous Execution.
275 
276  // add new enum values here
277 };
Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be...

◆ BoostLogSeverityMapping

◆ CapabilityClass

enum CapabilityClass
strong

Capability class to calculate in the GetCapabilities function so that only the capability in the scope can be choose to calculate.

Enumerator
PaddingRequired 
FallbackImportDisabled 
CapabilityClassMax 

Definition at line 20 of file ITensorHandleFactory.hpp.

◆ ComparisonOperation

enum ComparisonOperation
strong
Enumerator
Equal 
Greater 
GreaterOrEqual 
Less 
LessOrEqual 
NotEqual 

Definition at line 108 of file Types.hpp.

◆ Compute

enum Compute
strong

The Compute enum is now deprecated and it is now being replaced by BackendId.

Enumerator
Undefined 
CpuRef 

CPU Execution: Reference C++ kernels.

CpuAcc 

CPU Execution: NEON: ArmCompute.

GpuAcc 

GPU Execution: OpenCL: ArmCompute.

Definition at line 21 of file BackendId.hpp.

22 {
23  Undefined = 0,
24  /// CPU Execution: Reference C++ kernels
25  CpuRef = 1,
26  /// CPU Execution: NEON: ArmCompute
27  CpuAcc = 2,
28  /// GPU Execution: OpenCL: ArmCompute
29  GpuAcc = 3
30 };
CPU Execution: Reference C++ kernels.
GPU Execution: OpenCL: ArmCompute.
CPU Execution: NEON: ArmCompute.

◆ DataLayout

enum DataLayout
strong
Enumerator
NCHW 
NHWC 
NDHWC 
NCDHW 

Definition at line 62 of file Types.hpp.

◆ DataType

enum DataType
strong
Enumerator
Float16 
Float32 
QAsymmU8 
Signed32 
Boolean 
QSymmS16 
QSymmS8 
QAsymmS8 
BFloat16 
Signed64 

Definition at line 48 of file Types.hpp.

◆ Dimensionality

enum Dimensionality
strong
Enumerator
NotSpecified 
Specified 
Scalar 

Definition at line 158 of file Types.hpp.

◆ EdgeStrategy

enum EdgeStrategy
strong
Enumerator
Undefined 
DirectCompatibility 

No strategy has been defined. Used internally to verify integrity of optimizations.

ExportToTarget 

Destination backend can work directly with tensors on source backend.

CopyToTarget 

Source backends tensor data can be exported to destination backend tensor without copy.

Copy contents from source backend tensor to destination backend tensor.

Definition at line 100 of file ITensorHandleFactory.hpp.

101 {
102  Undefined, /// No strategy has been defined. Used internally to verify integrity of optimizations.
103  DirectCompatibility, /// Destination backend can work directly with tensors on source backend.
104  ExportToTarget, /// Source backends tensor data can be exported to destination backend tensor without copy.
105  CopyToTarget /// Copy contents from source backend tensor to destination backend tensor.
106 };
No strategy has been defined. Used internally to verify integrity of optimizations.
Source backends tensor data can be exported to destination backend tensor without copy...
Destination backend can work directly with tensors on source backend.

◆ GraphEvent

enum GraphEvent
strong
Enumerator
LayerAdded 
LayerErased 

Definition at line 12 of file IGraphObservable.hpp.

◆ JsonObjectType

enum JsonObjectType
strong
Enumerator
Measurement 
Event 
ExecObjectDesc 

Definition at line 20 of file JsonPrinter.hpp.

◆ LayerType

enum LayerType
strong

When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below.

Enumerator
Activation 
Addition 
ArgMinMax 
BatchNormalization 
BatchToSpaceNd 
Comparison 
Concat 
Constant 
ConvertBf16ToFp32 
ConvertFp16ToFp32 
ConvertFp32ToBf16 
ConvertFp32ToFp16 
Convolution2d 
Debug 
DepthToSpace 
DepthwiseConvolution2d 
Dequantize 
DetectionPostProcess 
Division 
ElementwiseUnary 
FakeQuantization 
Fill 
Floor 
FullyConnected 
Gather 
Input 
InstanceNormalization 
L2Normalization 
LogicalBinary 
LogSoftmax 
Lstm 
QLstm 
Map 
Maximum 
Mean 
MemCopy 
MemImport 
Merge 
Minimum 
Multiplication 
Normalization 
Output 
Pad 
Permute 
Pooling2d 
PreCompiled 
Prelu 
Quantize 
QuantizedLstm 
Reshape 
Rank 
Resize 
Reduce 
Slice 
Softmax 
SpaceToBatchNd 
SpaceToDepth 
Splitter 
Stack 
StandIn 
StridedSlice 
Subtraction 
Switch 
Transpose 
TransposeConvolution2d 
Unmap 
Cast 
Shape 
UnidirectionalSequenceLstm 
ChannelShuffle 
Convolution3d 
Pooling3d 
GatherNd 
FirstLayer 
LastLayer 

Definition at line 467 of file Types.hpp.

468 {
469 #define X(name) name,
471 #undef X
474 };
#define LIST_OF_LAYER_TYPE
This list uses X macro technique.
Definition: Types.hpp:388
float Activation(float in, ActivationFunction function, float a, float b)
Definition: Activation.cpp:13

◆ LogicalBinaryOperation

Enumerator
LogicalAnd 
LogicalOr 

Definition at line 118 of file Types.hpp.

◆ LogSeverity

enum LogSeverity
strong
Enumerator
Trace 
Debug 
Info 
Warning 
Error 
Fatal 

Definition at line 14 of file Utils.hpp.

15 {
16  Trace,
17  Debug,
18  Info,
19  Warning,
20  Error,
21  Fatal
22 };
void Debug(const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
Definition: Debug.cpp:19

◆ MemBlockStrategyType

enum MemBlockStrategyType
strong
Enumerator
SingleAxisPacking 
MultiAxisPacking 

Definition at line 239 of file Types.hpp.

240 {
241  // MemBlocks can be packed on the Y axis only, overlap allowed on X axis.
242  // In other words MemBlocks with overlapping lifetimes cannot use the same MemBin,
243  // equivalent to blob or pooling memory management.
244  SingleAxisPacking = 0,
245 
246  // MemBlocks can be packed on either Y or X axis but cannot overlap on both.
247  // In other words MemBlocks with overlapping lifetimes can use the same MemBin,
248  // equivalent to offset or slab memory management.
249  MultiAxisPacking = 1
250 };

◆ MemorySource

enum MemorySource : uint32_t
strong

Define the Memory Source to reduce copies.

Enumerator
Undefined 
Malloc 
DmaBuf 
DmaBufProtected 
Gralloc 

Definition at line 230 of file Types.hpp.

◆ NormalizationAlgorithmChannel

Enumerator
Across 
Within 

Definition at line 193 of file Types.hpp.

◆ NormalizationAlgorithmMethod

Enumerator
LocalBrightness 

Krichevsky 2012: Local Brightness Normalization.

LocalContrast 

Jarret 2009: Local Contrast Normalization.

Definition at line 199 of file Types.hpp.

200 {
201  /// Krichevsky 2012: Local Brightness Normalization
202  LocalBrightness = 0,
203  /// Jarret 2009: Local Contrast Normalization
204  LocalContrast = 1
205 };
Jarret 2009: Local Contrast Normalization.
Krichevsky 2012: Local Brightness Normalization.

◆ OutputShapeRounding

enum OutputShapeRounding
strong
Enumerator
Floor 
Ceiling 

Definition at line 207 of file Types.hpp.

◆ PaddingMethod

enum PaddingMethod
strong

The padding method modifies the output of pooling layers.

In both supported methods, the values are ignored (they are not even zeroes, which would make a difference for max pooling a tensor with negative values). The difference between IgnoreValue and Exclude is that the former counts the padding fields in the divisor of Average and L2 pooling, while Exclude does not.

Enumerator
IgnoreValue 

The padding fields count, but are ignored.

Exclude 

The padding fields don't count and are ignored.

Definition at line 174 of file Types.hpp.

175 {
176  /// The padding fields count, but are ignored
177  IgnoreValue = 0,
178  /// The padding fields don't count and are ignored
179  Exclude = 1
180 };
The padding fields don&#39;t count and are ignored.
The padding fields count, but are ignored.

◆ PaddingMode

enum PaddingMode
strong

The padding mode controls whether the padding should be filled with constant values (Constant), or reflect the input, either including the border values (Symmetric) or not (Reflect).

Enumerator
Constant 
Reflect 
Symmetric 

Definition at line 186 of file Types.hpp.

◆ PoolingAlgorithm

enum PoolingAlgorithm
strong
Enumerator
Max 
Average 
L2 

Definition at line 136 of file Types.hpp.

◆ ProfilingDetailsMethod

Define the behaviour of the internal profiler when outputting network details.

Enumerator
Undefined 
DetailsWithEvents 
DetailsOnly 

Definition at line 71 of file Types.hpp.

◆ QosExecPriority

enum QosExecPriority
strong
Enumerator
Low 
Medium 
High 

Definition at line 79 of file Types.hpp.

◆ ReduceOperation

enum ReduceOperation
strong
Enumerator
Sum 
Max 
Mean 
Min 
Prod 

Definition at line 143 of file Types.hpp.

◆ ResizeMethod

enum ResizeMethod
strong
Enumerator
Bilinear 
NearestNeighbor 

Definition at line 152 of file Types.hpp.

◆ ShapeInferenceMethod

enum ShapeInferenceMethod
strong

The ShapeInferenceMethod modify how the output shapes are treated.

When ValidateOnly is selected, the output shapes are inferred from the input parameters of the layer and any mismatch is reported. When InferAndValidate is selected 2 actions are performed: (1)infer output shape from inputs and (2)validate the shapes as in ValidateOnly. This option has been added to work with tensors which rank or dimension sizes are not specified explicitly, however this information can be calculated from the inputs.

Enumerator
ValidateOnly 

Validate all output shapes.

InferAndValidate 

Infer missing output shapes and validate all output shapes.

Definition at line 221 of file Types.hpp.

222 {
223  /// Validate all output shapes
224  ValidateOnly = 0,
225  /// Infer missing output shapes and validate all output shapes
226  InferAndValidate = 1
227 };
Validate all output shapes.
Infer missing output shapes and validate all output shapes.

◆ Status

enum Status
strong

enumeration

Enumerator
Success 
Failure 

Definition at line 42 of file Types.hpp.

◆ TuningLevel

enum TuningLevel
strong
Enumerator
None 
Rapid 
Normal 
Exhaustive 

Definition at line 70 of file ClBackendContext.cpp.

◆ UnaryOperation

enum UnaryOperation
strong
Enumerator
Abs 
Exp 
Sqrt 
Rsqrt 
Neg 
LogicalNot 
Log 
Sin 

Definition at line 124 of file Types.hpp.

Function Documentation

◆ Activation() [1/2]

float Activation ( float  in,
ActivationFunction  function,
float  a,
float  b 
)

Definition at line 13 of file Activation.cpp.

References Abs, BoundedReLu, Elu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by Activation(), LstmImpl(), and TEST_SUITE().

17 {
18  float output;
19 
20  // Compute the result of the activation function.
21  switch (function)
22  {
23  case ActivationFunction::Linear:
24  {
25  output = a * in + b;
26  break;
27  }
28  case ActivationFunction::Sigmoid:
29  {
30  output = 1.f / (1.f + expf(-in));
31  break;
32  }
33  case ActivationFunction::ReLu:
34  {
35  output = std::max(0.f, in);
36  break;
37  }
38  case ActivationFunction::BoundedReLu:
39  {
40  output = std::min(a, std::max(b, in));
41  break;
42  }
43  case ActivationFunction::SoftReLu:
44  {
45  output = logf(1.0f + expf(in));
46  break;
47  }
48  case ActivationFunction::LeakyReLu:
49  {
50  output = in > 0.0f ? in : (in * a);
51  break;
52  }
53  case ActivationFunction::Abs:
54  {
55  output = in < 0 ? -in : in;
56  break;
57  }
58  case ActivationFunction::Sqrt:
59  {
60  output = sqrtf(in);
61  break;
62  }
63  case ActivationFunction::Square:
64  {
65  output = in * in;
66  break;
67  }
68  case ActivationFunction::TanH:
69  {
70  output = a * tanhf(b * in);
71  break;
72  }
73  case ActivationFunction::Elu:
74  {
75  output = (in >= 0) ? in : a * (expf(in) - 1);
76  break;
77  }
78  case ActivationFunction::HardSwish:
79  {
80  // hard_swish(x) = x * relu6(x+3) / 6
81  // relu6(x) = min(max(x,0),6)
82  output = in * (std::min(std::max((in + 3),0.0f),6.0f)) / 6;
83  break;
84  }
85  default:
86  {
87  throw InvalidArgumentException("Unsupported activation function");
88  }
89  }
90 
91  return output;
92 }

◆ Activation() [2/2]

void Activation ( Decoder< float > &  in,
Encoder< float > &  out,
const TensorInfo tensorInfo,
ActivationFunction  function,
float  a,
float  b 
)

Definition at line 95 of file Activation.cpp.

References Activation(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), and Encoder< IType >::Set().

101 {
102  unsigned int numElements = tensorInfo.GetNumElements();
103 
104  for (unsigned int i = 0; i < numElements; i++)
105  {
106  out.Set(Activation(in.Get(), function, a, b));
107  ++in;
108  ++out;
109  }
110  in -= numElements;
111  out -= numElements;
112 }
virtual void Set(IType right)=0
virtual IType Get() const =0
void Activation(Decoder< float > &in, Encoder< float > &out, const TensorInfo &tensorInfo, ActivationFunction function, float a, float b)
Definition: Activation.cpp:95

◆ AllocateOutputData()

void armnn::AllocateOutputData ( unsigned int  numOutput,
unsigned int  numSelected,
const std::vector< float > &  boxCorners,
const std::vector< unsigned int > &  outputIndices,
const std::vector< unsigned int > &  selectedBoxes,
const std::vector< unsigned int > &  selectedClasses,
const std::vector< float > &  selectedScores,
float *  detectionBoxes,
float *  detectionScores,
float *  detectionClasses,
float *  numDetections 
)

Definition at line 102 of file DetectionPostProcess.cpp.

References numeric_cast().

Referenced by DetectionPostProcess().

113 {
114  for (unsigned int i = 0; i < numOutput; ++i)
115  {
116  unsigned int boxIndex = i * 4;
117  if (i < numSelected)
118  {
119  unsigned int boxCornorIndex = selectedBoxes[outputIndices[i]] * 4;
120  detectionScores[i] = selectedScores[outputIndices[i]];
121  detectionClasses[i] = armnn::numeric_cast<float>(selectedClasses[outputIndices[i]]);
122  detectionBoxes[boxIndex] = boxCorners[boxCornorIndex];
123  detectionBoxes[boxIndex + 1] = boxCorners[boxCornorIndex + 1];
124  detectionBoxes[boxIndex + 2] = boxCorners[boxCornorIndex + 2];
125  detectionBoxes[boxIndex + 3] = boxCorners[boxCornorIndex + 3];
126  }
127  else
128  {
129  detectionScores[i] = 0.0f;
130  detectionClasses[i] = 0.0f;
131  detectionBoxes[boxIndex] = 0.0f;
132  detectionBoxes[boxIndex + 1] = 0.0f;
133  detectionBoxes[boxIndex + 2] = 0.0f;
134  detectionBoxes[boxIndex + 3] = 0.0f;
135  }
136  }
137  numDetections[0] = armnn::numeric_cast<float>(numSelected);
138 }
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ AllTypesAreEqualImpl() [1/2]

bool armnn::AllTypesAreEqualImpl ( )

Definition at line 59 of file LayerSupportRules.hpp.

Referenced by AllTypesAreEqualImpl(), and TypesAreEqual::TypesAreEqual().

60 {
61  return true;
62 }

◆ AllTypesAreEqualImpl() [2/2]

bool armnn::AllTypesAreEqualImpl ( t1,
t2,
Rest...  rest 
)

Definition at line 65 of file LayerSupportRules.hpp.

References AllTypesAreEqualImpl().

66 {
67  static_assert(std::is_same<T, TensorInfo>::value, "Type T must be a TensorInfo");
68 
69  return (t1.GetDataType() == t2.GetDataType()) && AllTypesAreEqualImpl(t2, rest...);
70 }
bool AllTypesAreEqualImpl(T t1, T t2, Rest... rest)

◆ Append() [1/2]

void armnn::Append ( Optimizer::Optimizations optimizations,
T &&  optimization 
)

Definition at line 30 of file Optimizer.hpp.

Referenced by Append(), and MakeOptimizations().

31 {
32  optimizations.emplace_back(new T(optimization));
33 };

◆ Append() [2/2]

void armnn::Append ( Optimizer::Optimizations optimizations,
Front &&  front,
Others &&...  others 
)

Definition at line 36 of file Optimizer.hpp.

References Append().

37 {
38  Append<Front>(optimizations, std::forward<Front>(front));
39  Append<Others...>(optimizations, std::forward<Others>(others)...);
40 };
void Append(Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
Definition: Optimizer.hpp:36

◆ ApplyBackendOptimizations()

OptimizationResult armnn::ApplyBackendOptimizations ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
BackendsMap backends,
const ModelOptions modelOptions,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 1139 of file Network.cpp.

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), CpuAcc, Layer::GetBackendId(), OptimizedNetworkImpl::GetGraph(), SubgraphView::GetIConnectableLayers(), Layer::GetType(), GpuAcc, Input, OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MakeOptimizations(), Output, Optimizer::Pass(), ReportWarning(), SubgraphViewSelector::SelectSubgraphs(), Graph::SubstituteSubgraph(), and Undefined.

Referenced by Optimize().

1144 {
1145  ARMNN_ASSERT(optNetObjPtr);
1146  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ApplyBackendOptimizations")
1147  OptimizationResult result;
1148 
1149  // Get the optimized graph
1150  Graph& optGraph = optNetObjPtr->GetGraph();
1151 
1152  // Run backend specific optimizations
1153  for (auto&& selectedBackend : backendSettings.m_SelectedBackends)
1154  {
1155  auto backendObjPtr = backends.find(selectedBackend)->second.get();
1156  ARMNN_ASSERT(backendObjPtr);
1157 
1158  if(selectedBackend == armnn::Compute::GpuAcc || selectedBackend == armnn::Compute::CpuAcc)
1159  {
1161  Optimizer::Pass(optGraph, MakeOptimizations(optimizations::FusePermuteIntoConstLayer()));
1162  }
1163 
1164  // Select sub-graphs based on backend
1165  SubgraphViewSelector::Subgraphs subgraphs =
1166  SubgraphViewSelector::SelectSubgraphs(optGraph,
1167  // Select layers assigned to the requested backend
1168  [&backendObjPtr](const Layer& layer)
1169  {
1170 
1171  return layer.GetType() != LayerType::Input &&
1172  layer.GetType() != LayerType::Output &&
1173  layer.GetBackendId() == backendObjPtr->GetId();
1174  });
1175  if (subgraphs.empty())
1176  {
1177  // No sub-graphs found, try with next selected backend
1178  continue;
1179  }
1180 
1181  // Try to optimize each sub-graph
1182  for (auto& subgraph : subgraphs)
1183  {
1184  // Try to optimize the current sub-graph
1185  ARMNN_SCOPED_PROFILING_EVENT(backendObjPtr->GetId(), "Optimizer_OptimizeSubgraph");
1186  OptimizationViews optimizationViews = backendObjPtr->OptimizeSubgraphView(*subgraph, modelOptions);
1187  ARMNN_ASSERT(optimizationViews.Validate(*subgraph));
1188 
1189  // Optimization attempted, check the resulting optimized sub-graph
1190  for (auto& substitution : optimizationViews.GetSubstitutions())
1191  {
1192  // Sub-graph optimized, substitute the sub-graph with the new optimized one in the main optimized graph
1193  SubgraphView& replacementSubgraph = substitution.m_ReplacementSubgraph;
1194  SubgraphView& substitutableSubgraph = substitution.m_SubstitutableSubgraph;
1195  optGraph.SubstituteSubgraph(substitutableSubgraph, replacementSubgraph);
1196 
1197  // Assign the current backend to the optimized sub-graph
1198  const SubgraphView::IConnectableLayers& subgraphLayers = replacementSubgraph.GetIConnectableLayers();
1199  std::for_each(subgraphLayers.begin(), subgraphLayers.end(), [&selectedBackend](IConnectableLayer* l)
1200  {
1201  ARMNN_ASSERT(l);
1202  PolymorphicDowncast<Layer*>(l)->SetBackendId(selectedBackend);
1203  });
1204  }
1205 
1206  if (!optimizationViews.GetFailedSubgraphs().empty())
1207  {
1208  std::stringstream warningMsg;
1209  warningMsg << "Some sub-graph(s) failed to optimized on " << backendObjPtr->GetId() << " backend.";
1210  ReportWarning(warningMsg.str(), errMessages);
1211 
1212  // Failed to optimize the given sub-graph, re-assign the sub-graph layers to other available backends
1213  BackendSettings settingsCopy(backendSettings);
1214  if (!backendObjPtr->GetId().IsCpuRef())
1215  {
1216  // Add the current backend to the list of backends to ignore
1217  settingsCopy.m_IgnoredBackends.insert(backendObjPtr->GetId());
1218  }
1219 
1220  int count=0;
1221  for (auto& failedSubgraph : optimizationViews.GetFailedSubgraphs())
1222  {
1223  // An error occurred: the optimization was attempted but not performed, try different backends
1224  std::stringstream subgraphMsg;
1225  subgraphMsg << "Re-assigning backends to " << failedSubgraph.GetIConnectableLayers().size()
1226  << " layers inside sub-graph " << count++;
1227  ReportWarning(subgraphMsg.str(), errMessages);
1228 
1229  OptimizationResult reassignmentResult = AssignBackends(optNetObjPtr,
1230  settingsCopy,
1231  *subgraph,
1232  errMessages);
1233  if (reassignmentResult.m_Error)
1234  {
1235  // Failed to re-assign one of the remaining backends to each layer of the sub-graph
1236  result.m_Error = true;
1237  return result;
1238  }
1239  }
1240  }
1241  }
1242  }
1243 
1244  return result;
1245 }
OptimizeForType< Layer, PermuteDepthwiseConv2dWeightsImpl > PermuteDepthwiseConv2dWeights
Optimizer::Optimizations MakeOptimizations(Args &&... args)
Definition: Optimizer.hpp:43
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
OptimizeForConnection< ConstantLayer, PermuteLayer, ConvertConstPermuteLayersToConstLayers > FusePermuteIntoConstLayer
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
GPU Execution: OpenCL: ArmCompute.
CPU Execution: NEON: ArmCompute.
void ReportWarning(const std::string &warningMessage, Optional< std::vector< std::string > &> warningMessages)
Definition: Network.cpp:568
OptimizationResult AssignBackends(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:1106

◆ ArgMinMax() [1/3]

void ArgMinMax ( Decoder< float > &  in,
OUT *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

Definition at line 16 of file ArgMinMax.cpp.

References Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), armnnUtils::GetUnsignedAxis(), IgnoreUnused(), Max, Min, and numeric_cast().

Referenced by TEST_SUITE().

18 {
19  IgnoreUnused(outputTensorInfo);
20 
21  unsigned int uAxis = armnnUtils::GetUnsignedAxis(inputTensorInfo.GetNumDimensions(), axis);
22 
23  const unsigned int outerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(), 0, uAxis);
24  const unsigned int axisSize = inputTensorInfo.GetShape()[uAxis];
25  const unsigned int innerElements = armnnUtils::GetNumElementsBetween(inputTensorInfo.GetShape(),
26  uAxis + 1,
27  inputTensorInfo.GetNumDimensions());
28 
29  for (unsigned int outer = 0; outer < outerElements; ++outer) {
30  for (unsigned int inner = 0; inner < innerElements; ++inner) {
31  in[outer * axisSize * innerElements + inner];
32  auto tmpValue = in.Get();
33  unsigned int tmpIndex = 0;
34  for (unsigned int i = 1; i < axisSize; ++i) {
35  in[(outer * axisSize * innerElements) + (i * innerElements) + inner];
36  const auto& value = in.Get();
37  if ((function == armnn::ArgMinMaxFunction::Min && value < tmpValue) ||
38  (function == armnn::ArgMinMaxFunction::Max && value > tmpValue)) {
39  tmpValue = value;
40  tmpIndex = i;
41  }
42  }
43 
44  out[outer * innerElements + inner] = armnn::numeric_cast<OUT>(tmpIndex);
45  }
46  }
47 }
unsigned int GetNumElementsBetween(const armnn::TensorShape &shape, unsigned int firstAxisInclusive, unsigned int lastAxisExclusive)
void IgnoreUnused(Ts &&...)
virtual IType Get() const =0
unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ArgMinMax() [2/3]

template void armnn::ArgMinMax ( Decoder< float > &  in,
int32_t *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

◆ ArgMinMax() [3/3]

template void armnn::ArgMinMax ( Decoder< float > &  in,
int64_t *  out,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
ArgMinMaxFunction  function,
int  axis 
)

◆ ARMNN_DEPRECATED_MSG_REMOVAL_DATE() [1/2]

class armnn::ARMNN_DEPRECATED_MSG_REMOVAL_DATE ( "Use ABI stable IStrategy instead."  ,
"22.05"   
)

Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
activationDescriptor- ActivationDescriptor to configure the activation.
name- Optional name for the layer.

Function that an addition layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that an arg min max layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
argMinMaxDescriptor- ArgMinMaxDescriptor to configure the activation.
name- Optional name for the layer.

Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
mean- Pre-calculated mean for each channel.
variance- Pre-calculated variance for each channel.
beta- Per-channel additive factor.
gamma- Per-channel multiplicative factor.
name- Optional name for the layer.

Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
batchToSpaceNdDescriptor- Description of the layer.
name- Optional name for the layer.

Function a Comparison layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
comparisonDescriptor- Description of the layer.
name- Optional name for the layer.

Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
concatDescriptor- ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation process. Number of Views must be equal to the number of inputs, and their order must match - e.g. first view corresponds to the first input, second view to the second input, etc....
name- Optional name for the layer.

Function a layer with no inputs and a single output, which always corresponds to the passed in constant tensor should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
input- Tensor to be provided as the only output of the layer. The layer will maintain its own copy of the tensor data, meaning the memory referenced by input can be freed or reused after this function is called.
name- Optional name for the layer.

Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor- Description of the 2D convolution layer.
name- Optional name for the layer.

Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor- Description of the 2D convolution layer.
weights- Tensor for the weights data.
biases- Optional tensor for the bias data. If specified, must match the output tensor shape.
name- Optional name for the layer.

Function a depth to space layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
depthToSpaceDescriptor- Parameters for the depth to space operation.
name- Optional name for the layer.

Function that a 2D depthwise convolution layer with biases should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor- Description of the 2D depthwise convolution layer.
name- Optional name for the layer.

Function that a 2D depthwise convolution layer with biases should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
convolution2dDescriptor- Description of the 2D depthwise convolution layer.
weights- Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
biases- Optional tensor for the bias data. If specified, must match the output tensor shape.
name- Optional name for the layer.

Function that a Dequantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that a Detection PostProcess layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
descriptor- Description of the Detection PostProcess layer.
anchors- Tensor for the anchors.
name- Optional name for the layer.

Function a division layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function a ElementwiseUnary layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
elementwiseUnaryDescriptor- Description of the layer.
name- Optional name for the layer.

Function a fill layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
fillDescriptor- Description of the layer
name- Optional name for the layer.

Function a floor layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
fullyConnectedDescriptor- Description of the fully connected layer.
name- Optional name for the layer.

Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
fullyConnectedDescriptor- Description of the fully connected layer.
weights- Tensor for the weights data.
biases- Optional tensor for the bias data.
name- Optional name for the layer.

Function a Gather layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
gatherDescriptor- Parameters for the gather operation.
name- Optional name for the layer.

Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
id- User generated id to uniquely identify a particular input. The same id needs to be specified when passing the inputs to the IRuntime::EnqueueWorkload() function.
name- Optional name for the layer.

Function that an instance normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
desc- Parameters for the instance normalization operation.
name- Optional name for the layer.

Function that an L2 normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked. Normalization is performed along dimension 1, but requires a 4d input.

Parameters
layer- pointer to the layer which is calling back to this visit function.
desc- Parameters for the L2 normalization operation.
name- Optional name for the layer.

Function that a log softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
logSoftmaxDescriptor- LogSoftmaxDescriptor to configure the log softmax.
name- Optional name for the layer.

Function that a logical binary layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
logicalBinaryDescriptor- LogicalBinaryDescriptor to configure the logical unary layer.
name- Optional name for the layer.

Function an Lstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
descriptor- Parameters controlling the operation of the Lstm operation.
params- The weights and biases for the LSTM cell.
name- Optional name for the layer.

Function a Maximum layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function a Mean layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
meanDescriptor- Parameters for the mean operation.
name- Optional name for the layer.

Function that a merge layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function a Minimum layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that a multiplication layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that a normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
normalizationDescriptor- NormalizationDescriptor to configure the normalization.
name- Optional name for the layer.

Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
id- User generated id to uniquely identify a particular output. The same id needs to be specified when passing the outputs to the IRuntime::EnqueueWorkload() function.
name- Optional name for the layer.

Function a pad layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
paddings- n by 2 tensor, where n is the rank of the input tensor, such that paddings[i,0] indicates the amount of padding to add in front of dimension i, and paddings[i,1] indicates the amount of padding to add after the end of dimension i
name- Optional name for the layer.

Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
permuteDescriptor- PermuteDescriptor to configure the permute.
name- Optional name for the layer.

Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
pooling2dDescriptor- Pooling2dDescriptor to configure the pooling.
name- Optional name for the layer.

Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
pooling3dDescriptor- Pooling3dDescriptor to configure the pooling.
name- Optional name for the layer.

Function that a PReLU activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function a quantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function a QLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
descriptor- Parameters controlling the operation of the QLstm operation.
params- The weights and biases for the layer
name- Optional name for the layer.

Function a QuantizedLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
params- The weights and biases for the Quantized LSTM cell
name- Optional name for the layer.

Function a rank layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that a reduce layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
ReduceDescriptor- Parameters for the reduce max operation.
name- Optional name for the layer.

Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
reshapeDescriptor- Parameters for the reshape operation.
name- Optional name for the layer.

Function that a resize layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
resizeDescriptor- Parameters for the resize operation.
name- Optional name for the layer.

Function that a slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
sliceDescriptor- SliceDescriptor to configure the slice operation.
name- Optional name for the layer.

Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
softmaxDescriptor- SoftmaxDescriptor to configure the softmax.
name- Optional name for the layer.

Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
spaceToBatchNdDescriptor- Parameters for the space to batch operation.
name- Optional name for the layer.

Function a space to depth layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
spaceToDepthDescriptor- Parameters for the space to depth operation.
name- Optional name for the layer.

Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
splitterDescriptor- ViewsDescriptor to configure the splitting process. Number of Views must be equal to the number of outputs, and their order must match - e.g. first view corresponds to the first output, second view to the second output, etc....
name- Optional name for the layer.

Function a stack layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
stackDescriptor- Parameters for the stack operation.
name- Optional name for the layer.

Function a StandInLayer should call back to when its Accept(ILaterVisitor&) function is invoked

Parameters
layer- pointer to the layer which is calling back to this visit function.
standInDescriptor- Parameters for the stand-in layer.
name- Optional name for the layer.

Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
stridedSliceDescriptor- Parameters for the strided slice operation.
name- Optional name for the layer.

Function a subtraction layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function a switch layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
name- Optional name for the layer.

Function that a 2D transpose convolution layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
descriptor- Description of the 2D transpose convolution layer.
weights- Tensor for the weights data.
biases- Optional tensor for the bias data.
name- Optional name for the layer.

Function that a transpose layer should call back to when its Accept(ILayerVisitor&) function is invoked.

Parameters
layer- pointer to the layer which is calling back to this visit function.
transposeDescriptor- TransposeDescriptor to configure the transpose.
name- Optional name for the layer.

Definition at line 16 of file ILayerVisitor.hpp.

References ARMNN_DEPRECATED_MSG, and ARMNN_DEPRECATED_MSG_REMOVAL_DATE().

17 {
18 protected:
19  ILayerVisitor() {}
20  virtual ~ILayerVisitor() {}
21 
22 public:
23 
24  /// Function that an activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.
25  /// @param layer - pointer to the layer which is calling back to this visit function.
26  /// @param activationDescriptor - ActivationDescriptor to configure the activation.
27  /// @param name - Optional name for the layer.
28  virtual void VisitActivationLayer(const IConnectableLayer* layer,
29  const ActivationDescriptor& activationDescriptor,
30  const char* name = nullptr) = 0;
31 
32  /// Function that an addition layer should call back to when its Accept(ILayerVisitor&) function is invoked.
33  /// @param layer - pointer to the layer which is calling back to this visit function.
34  /// @param name - Optional name for the layer.
35  virtual void VisitAdditionLayer(const IConnectableLayer* layer,
36  const char* name = nullptr) = 0;
37 
38  /// Function that an arg min max layer should call back to when its Accept(ILayerVisitor&) function is invoked.
39  /// @param layer - pointer to the layer which is calling back to this visit function.
40  /// @param argMinMaxDescriptor - ArgMinMaxDescriptor to configure the activation.
41  /// @param name - Optional name for the layer.
42  virtual void VisitArgMinMaxLayer(const IConnectableLayer* layer,
43  const ArgMinMaxDescriptor& argMinMaxDescriptor,
44  const char* name = nullptr) = 0;
45 
46  /// Function that a batch normalization layer should call back to when its Accept(ILayerVisitor&)
47  /// function is invoked.
48  /// @param layer - pointer to the layer which is calling back to this visit function.
49  /// @param mean - Pre-calculated mean for each channel.
50  /// @param variance - Pre-calculated variance for each channel.
51  /// @param beta - Per-channel additive factor.
52  /// @param gamma - Per-channel multiplicative factor.
53  /// @param name - Optional name for the layer.
54  virtual void VisitBatchNormalizationLayer(const IConnectableLayer* layer,
55  const BatchNormalizationDescriptor& desc,
56  const ConstTensor& mean,
57  const ConstTensor& variance,
58  const ConstTensor& beta,
59  const ConstTensor& gamma,
60  const char* name = nullptr) = 0;
61 
62  /// Function that a batch to space ND layer should call back to when its Accept(ILayerVisitor&)
63  /// function is invoked.
64  /// @param layer - pointer to the layer which is calling back to this visit function.
65  /// @param batchToSpaceNdDescriptor - Description of the layer.
66  /// @param name - Optional name for the layer.
67  virtual void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
68  const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
69  const char* name = nullptr) = 0;
70 
71  /// Function a Comparison layer should call back to when its Accept(ILayerVisitor&) function is invoked.
72  /// @param layer - pointer to the layer which is calling back to this visit function.
73  /// @param comparisonDescriptor - Description of the layer.
74  /// @param name - Optional name for the layer.
75  virtual void VisitComparisonLayer(const IConnectableLayer* layer,
76  const ComparisonDescriptor& comparisonDescriptor,
77  const char* name = nullptr) = 0;
78 
79  /// Function that a concat layer should call back to when its Accept(ILayerVisitor&) function is invoked.
80  /// @param layer - pointer to the layer which is calling back to this visit function.
81  /// @param concatDescriptor - ConcatDescriptor (synonym for OriginsDescriptor) to configure the concatenation
82  /// process. Number of Views must be equal to the number of inputs, and their order
83  /// must match - e.g. first view corresponds to the first input, second view to the
84  /// second input, etc....
85  /// @param name - Optional name for the layer.
86  virtual void VisitConcatLayer(const IConnectableLayer* layer,
87  const OriginsDescriptor& concatDescriptor,
88  const char* name = nullptr) = 0;
89 
90  /// Function a layer with no inputs and a single output, which always corresponds to
91  /// the passed in constant tensor should call back to when its Accept(ILayerVisitor&) function is invoked.
92  /// @param layer - pointer to the layer which is calling back to this visit function.
93  /// @param input - Tensor to be provided as the only output of the layer. The layer will maintain
94  /// its own copy of the tensor data, meaning the memory referenced by @a input can
95  /// be freed or reused after this function is called.
96  /// @param name - Optional name for the layer.
97  virtual void VisitConstantLayer(const IConnectableLayer* layer,
98  const ConstTensor& input,
99  const char* name = nullptr) = 0;
100 
101  /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&)
102  /// function is invoked.
103  /// @param layer - pointer to the layer which is calling back to this visit function.
104  /// @param convolution2dDescriptor - Description of the 2D convolution layer.
105  /// @param name - Optional name for the layer.
106  virtual void VisitConvolution2dLayer(const IConnectableLayer* layer,
107  const Convolution2dDescriptor& convolution2dDescriptor,
108  const char* name = nullptr) = 0;
109 
110  /// Function that a 2D convolution layer should call back to when its Accept(ILayerVisitor&)
111  /// function is invoked.
112  /// @param layer - pointer to the layer which is calling back to this visit function.
113  /// @param convolution2dDescriptor - Description of the 2D convolution layer.
114  /// @param weights - Tensor for the weights data.
115  /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
116  /// @param name - Optional name for the layer.
117  ARMNN_DEPRECATED_MSG("Use VisitConvolution2dLayer without ConstTensors")
118  virtual void VisitConvolution2dLayer(const IConnectableLayer* layer,
119  const Convolution2dDescriptor& convolution2dDescriptor,
120  const ConstTensor& weights,
121  const Optional<ConstTensor>& biases,
122  const char* name = nullptr) = 0;
123 
124  /// Function a depth to space layer should call back to when its Accept(ILayerVisitor&) function is invoked.
125  /// @param layer - pointer to the layer which is calling back to this visit function.
126  /// @param depthToSpaceDescriptor - Parameters for the depth to space operation.
127  /// @param name - Optional name for the layer.
128  virtual void VisitDepthToSpaceLayer(const IConnectableLayer* layer,
129  const DepthToSpaceDescriptor& depthToSpaceDescriptor,
130  const char* name = nullptr) = 0;
131 
132  /// Function that a 2D depthwise convolution layer with biases should call back to when its
133  /// Accept(ILayerVisitor&) function is invoked.
134  /// @param layer - pointer to the layer which is calling back to this visit function.
135  /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
136  /// @param name - Optional name for the layer.
137  virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
138  const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
139  const char* name = nullptr) = 0;
140 
141  /// Function that a 2D depthwise convolution layer with biases should call back to when its
142  /// Accept(ILayerVisitor&) function is invoked.
143  /// @param layer - pointer to the layer which is calling back to this visit function.
144  /// @param convolution2dDescriptor - Description of the 2D depthwise convolution layer.
145  /// @param weights - Tensor for the weights. Expected format: [channelMultiplier, inputChannels, height, width].
146  /// @param biases - Optional tensor for the bias data. If specified, must match the output tensor shape.
147  /// @param name - Optional name for the layer.
148  ARMNN_DEPRECATED_MSG("Use VisitDepthwiseConvolution2dLayer without ConstTensors")
149  virtual void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
150  const DepthwiseConvolution2dDescriptor& convolution2dDescriptor,
151  const ConstTensor& weights,
152  const Optional<ConstTensor>& biases,
153  const char* name = nullptr) = 0;
154 
155  /// Function that a Dequantize layer should call back to when its
156  /// Accept(ILayerVisitor&) function is invoked.
157  /// @param layer - pointer to the layer which is calling back to this visit function.
158  /// @param name - Optional name for the layer.
159  virtual void VisitDequantizeLayer(const IConnectableLayer* layer,
160  const char* name = nullptr) = 0;
161 
162  /// Function that a Detection PostProcess layer should call back to when its
163  /// Accept(ILayerVisitor&) function is invoked.
164  /// @param layer - pointer to the layer which is calling back to this visit function.
165  /// @param descriptor - Description of the Detection PostProcess layer.
166  /// @param anchors - Tensor for the anchors.
167  /// @param name - Optional name for the layer.
168  virtual void VisitDetectionPostProcessLayer(const IConnectableLayer* layer,
169  const DetectionPostProcessDescriptor& descriptor,
170  const ConstTensor& anchors,
171  const char* name = nullptr) = 0;
172 
173  /// Function a division layer should call back to when its Accept(ILayerVisitor&) function is invoked.
174  /// @param layer - pointer to the layer which is calling back to this visit function.
175  /// @param name - Optional name for the layer.
176  virtual void VisitDivisionLayer(const IConnectableLayer* layer,
177  const char* name = nullptr) = 0;
178 
179  /// Function a ElementwiseUnary layer should call back to when its Accept(ILayerVisitor&) function is invoked.
180  /// @param layer - pointer to the layer which is calling back to this visit function.
181  /// @param elementwiseUnaryDescriptor - Description of the layer.
182  /// @param name - Optional name for the layer.
183  virtual void VisitElementwiseUnaryLayer(const IConnectableLayer* layer,
184  const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor,
185  const char* name = nullptr) = 0;
186 
187  /// Function a fill layer should call back to when its Accept(ILayerVisitor&) function is invoked.
188  /// @param layer - pointer to the layer which is calling back to this visit function.
189  /// @param fillDescriptor - Description of the layer
190  /// @param name - Optional name for the layer.
191  virtual void VisitFillLayer(const IConnectableLayer* layer,
192  const FillDescriptor& fillDescriptor,
193  const char* name = nullptr) = 0;
194 
195  /// Function a floor layer should call back to when its Accept(ILayerVisitor&) function is invoked.
196  /// @param layer - pointer to the layer which is calling back to this visit function.
197  /// @param name - Optional name for the layer.
198  virtual void VisitFloorLayer(const IConnectableLayer* layer,
199  const char* name = nullptr) = 0;
200 
201 
202  /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
203  /// function is invoked.
204  /// @param layer - pointer to the layer which is calling back to this visit function.
205  /// @param fullyConnectedDescriptor - Description of the fully connected layer.
206  /// @param name - Optional name for the layer.
207  virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
208  const FullyConnectedDescriptor& fullyConnectedDescriptor,
209  const char* name = nullptr) = 0;
210 
211  /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&)
212  /// function is invoked.
213  /// @param layer - pointer to the layer which is calling back to this visit function.
214  /// @param fullyConnectedDescriptor - Description of the fully connected layer.
215  /// @param weights - Tensor for the weights data.
216  /// @param biases - Optional tensor for the bias data.
217  /// @param name - Optional name for the layer.
218  ARMNN_DEPRECATED_MSG_REMOVAL_DATE("Use VisitFullyConnectedLayer without ConstTensors", "22.05")
219  virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer,
220  const FullyConnectedDescriptor& fullyConnectedDescriptor,
221  const ConstTensor& weights,
222  const Optional<ConstTensor>& biases,
223  const char* name = nullptr) = 0;
224 
225  /// Function a Gather layer should call back to when its Accept(ILayerVisitor&) function is invoked.
226  /// @param layer - pointer to the layer which is calling back to this visit function.
227  /// @param gatherDescriptor - Parameters for the gather operation.
228  /// @param name - Optional name for the layer.
229  virtual void VisitGatherLayer(const IConnectableLayer* layer,
230  const GatherDescriptor& gatherDescriptor,
231  const char* name = nullptr) = 0;
232 
233  /// Function that an InputLayer should call back to when its Accept(ILayerVisitor&) function is invoked.
234  /// @param layer - pointer to the layer which is calling back to this visit function.
235  /// @param id - User generated id to uniquely identify a particular input. The same id needs to be specified
236  /// when passing the inputs to the IRuntime::EnqueueWorkload() function.
237  /// @param name - Optional name for the layer.
238  virtual void VisitInputLayer(const IConnectableLayer* layer,
239  LayerBindingId id,
240  const char* name = nullptr) = 0;
241 
242  /// Function that an instance normalization layer should call back to when its Accept(ILayerVisitor&)
243  /// function is invoked.
244  /// @param layer - pointer to the layer which is calling back to this visit function.
245  /// @param desc - Parameters for the instance normalization operation.
246  /// @param name - Optional name for the layer.
247  virtual void VisitInstanceNormalizationLayer(const IConnectableLayer* layer,
248  const InstanceNormalizationDescriptor& desc,
249  const char* name = nullptr) = 0;
250 
251  /// Function that an L2 normalization layer should call back to when its Accept(ILayerVisitor&)
252  /// function is invoked. Normalization is performed along dimension 1, but requires a 4d input.
253  /// @param layer - pointer to the layer which is calling back to this visit function.
254  /// @param desc - Parameters for the L2 normalization operation.
255  /// @param name - Optional name for the layer.
256  virtual void VisitL2NormalizationLayer(const IConnectableLayer* layer,
257  const L2NormalizationDescriptor& desc,
258  const char* name = nullptr) = 0;
259 
260  /// Function that a log softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.
261  /// @param layer - pointer to the layer which is calling back to this visit function.
262  /// @param logSoftmaxDescriptor - LogSoftmaxDescriptor to configure the log softmax.
263  /// @param name - Optional name for the layer.
264  virtual void VisitLogSoftmaxLayer(const IConnectableLayer* layer,
265  const LogSoftmaxDescriptor& logSoftmaxDescriptor,
266  const char* name = nullptr) = 0;
267 
268  /// Function that a logical binary layer should call back to when its Accept(ILayerVisitor&) function is invoked.
269  /// @param layer - pointer to the layer which is calling back to this visit function.
270  /// @param logicalBinaryDescriptor - LogicalBinaryDescriptor to configure the logical unary layer.
271  /// @param name - Optional name for the layer.
272  virtual void VisitLogicalBinaryLayer(const IConnectableLayer* layer,
273  const LogicalBinaryDescriptor& logicalBinaryDescriptor,
274  const char* name = nullptr) = 0;
275 
276  /// Function an Lstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
277  /// @param layer - pointer to the layer which is calling back to this visit function.
278  /// @param descriptor - Parameters controlling the operation of the Lstm operation.
279  /// @param params - The weights and biases for the LSTM cell.
280  /// @param name - Optional name for the layer.
281  virtual void VisitLstmLayer(const IConnectableLayer* layer,
282  const LstmDescriptor& descriptor,
283  const LstmInputParams& params,
284  const char* name = nullptr) = 0;
285 
286  /// Function a Maximum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
287  /// @param layer - pointer to the layer which is calling back to this visit function.
288  /// @param name - Optional name for the layer.
289  virtual void VisitMaximumLayer(const IConnectableLayer* layer,
290  const char* name = nullptr) = 0;
291 
292  /// Function a Mean layer should call back to when its Accept(ILayerVisitor&) function is invoked.
293  /// @param layer - pointer to the layer which is calling back to this visit function.
294  /// @param meanDescriptor - Parameters for the mean operation.
295  /// @param name - Optional name for the layer.
296  virtual void VisitMeanLayer(const IConnectableLayer* layer,
297  const MeanDescriptor& meanDescriptor,
298  const char* name = nullptr) = 0;
299 
300  /// Function that a merge layer should call back to when its Accept(ILayerVisitor&) function is invoked.
301  /// @param layer - pointer to the layer which is calling back to this visit function.
302  /// @param name - Optional name for the layer.
303  virtual void VisitMergeLayer(const IConnectableLayer* layer,
304  const char* name = nullptr) = 0;
305 
306  /// Function a Minimum layer should call back to when its Accept(ILayerVisitor&) function is invoked.
307  /// @param layer - pointer to the layer which is calling back to this visit function.
308  /// @param name - Optional name for the layer.
309  virtual void VisitMinimumLayer(const IConnectableLayer* layer,
310  const char* name = nullptr) = 0;
311 
312  /// Function that a multiplication layer should call back to when its Accept(ILayerVisitor&) function is invoked.
313  /// @param layer - pointer to the layer which is calling back to this visit function.
314  /// @param name - Optional name for the layer.
315  virtual void VisitMultiplicationLayer(const IConnectableLayer* layer,
316  const char* name = nullptr) = 0;
317 
318  /// Function that a normalization layer should call back to when its Accept(ILayerVisitor&) function is invoked.
319  /// @param layer - pointer to the layer which is calling back to this visit function.
320  /// @param normalizationDescriptor - NormalizationDescriptor to configure the normalization.
321  /// @param name - Optional name for the layer.
322  virtual void VisitNormalizationLayer(const IConnectableLayer* layer,
323  const NormalizationDescriptor& normalizationDescriptor,
324  const char* name = nullptr) = 0;
325 
326  /// Function an output layer should call back to when its Accept(ILayerVisitor&) function is invoked.
327  /// @param layer - pointer to the layer which is calling back to this visit function.
328  /// @param id - User generated id to uniquely identify a particular output. The same id needs to be specified
329  /// when passing the outputs to the IRuntime::EnqueueWorkload() function.
330  /// @param name - Optional name for the layer.
331  virtual void VisitOutputLayer(const IConnectableLayer* layer,
332  LayerBindingId id,
333  const char* name = nullptr) = 0;
334 
335  /// Function a pad layer should call back to when its Accept(ILayerVisitor&) function is invoked.
336  /// @param layer - pointer to the layer which is calling back to this visit function.
337  /// @param paddings - n by 2 tensor, where n is the rank of the input tensor,
338  /// such that paddings[i,0] indicates the amount of padding to add in front of dimension i, and
339  /// paddings[i,1] indicates the amount of padding to add after the end of dimension i
340  /// @param name - Optional name for the layer.
341  virtual void VisitPadLayer(const IConnectableLayer* layer,
342  const PadDescriptor& padDescriptor,
343  const char* name = nullptr) = 0;
344 
345  /// Function that a permute layer should call back to when its Accept(ILayerVisitor&) function is invoked.
346  /// @param layer - pointer to the layer which is calling back to this visit function.
347  /// @param permuteDescriptor - PermuteDescriptor to configure the permute.
348  /// @param name - Optional name for the layer.
349  virtual void VisitPermuteLayer(const IConnectableLayer* layer,
350  const PermuteDescriptor& permuteDescriptor,
351  const char* name = nullptr) = 0;
352 
353  /// Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.
354  /// @param layer - pointer to the layer which is calling back to this visit function.
355  /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
356  /// @param name - Optional name for the layer.
357  virtual void VisitPooling2dLayer(const IConnectableLayer* layer,
358  const Pooling2dDescriptor& pooling2dDescriptor,
359  const char* name = nullptr) = 0;
360 
361  /// Function that a pooling layer should call back to when its Accept(ILayerVisitor&) function is invoked.
362  /// @param layer - pointer to the layer which is calling back to this visit function.
363  /// @param pooling3dDescriptor - Pooling3dDescriptor to configure the pooling.
364  /// @param name - Optional name for the layer.
365  virtual void VisitPooling3dLayer(const IConnectableLayer* layer,
366  const Pooling3dDescriptor& pooling3dDescriptor,
367  const char* name = nullptr) = 0;
368 
369  /// Function that a PReLU activation layer should call back to when its Accept(ILayerVisitor&) function is invoked.
370  /// @param layer - pointer to the layer which is calling back to this visit function.
371  /// @param name - Optional name for the layer.
372  virtual void VisitPreluLayer(const IConnectableLayer* layer,
373  const char* name = nullptr) = 0;
374 
375  /// Function a quantize layer should call back to when its Accept(ILayerVisitor&) function is invoked.
376  /// @param layer - pointer to the layer which is calling back to this visit function.
377  /// @param name - Optional name for the layer.
378  virtual void VisitQuantizeLayer(const IConnectableLayer* layer,
379  const char* name = nullptr) = 0;
380 
381  /// Function a QLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
382  /// @param layer - pointer to the layer which is calling back to this visit function.
383  /// @param descriptor - Parameters controlling the operation of the QLstm operation.
384  /// @param params - The weights and biases for the layer
385  /// @param name - Optional name for the layer.
386  virtual void VisitQLstmLayer(const IConnectableLayer* layer,
387  const QLstmDescriptor& descriptor,
388  const LstmInputParams& params,
389  const char* name = nullptr) = 0;
390 
391  /// Function a QuantizedLstm layer should call back to when its Accept(ILayerVisitor&) function is invoked.
392  /// @param layer - pointer to the layer which is calling back to this visit function.
393  /// @param params - The weights and biases for the Quantized LSTM cell
394  /// @param name - Optional name for the layer.
395  virtual void VisitQuantizedLstmLayer(const IConnectableLayer* layer,
396  const QuantizedLstmInputParams& params,
397  const char* name = nullptr) = 0;
398 
399  /// Function a rank layer should call back to when its Accept(ILayerVisitor&) function is invoked.
400  /// @param layer - pointer to the layer which is calling back to this visit function.
401  /// @param name - Optional name for the layer.
402  virtual void VisitRankLayer(const IConnectableLayer* layer,
403  const char* name = nullptr) = 0;
404 
405  /// Function that a reduce layer should call back to when its Accept(ILayerVisitor&) function is invoked.
406  /// @param layer - pointer to the layer which is calling back to this visit function.
407  /// @param ReduceDescriptor - Parameters for the reduce max operation.
408  /// @param name - Optional name for the layer.
409  virtual void VisitReduceLayer(const IConnectableLayer* layer,
410  const ReduceDescriptor& reduceDescriptor,
411  const char* name = nullptr) = 0;
412 
413  /// Function a reshape layer should call back to when its Accept(ILayerVisitor&) function is invoked.
414  /// @param layer - pointer to the layer which is calling back to this visit function.
415  /// @param reshapeDescriptor - Parameters for the reshape operation.
416  /// @param name - Optional name for the layer.
417  virtual void VisitReshapeLayer(const IConnectableLayer* layer,
418  const ReshapeDescriptor& reshapeDescriptor,
419  const char* name = nullptr) = 0;
420 
421  /// Function that a resize layer should call back to when its Accept(ILayerVisitor&) function is invoked.
422  /// @param layer - pointer to the layer which is calling back to this visit function.
423  /// @param resizeDescriptor - Parameters for the resize operation.
424  /// @param name - Optional name for the layer.
425  virtual void VisitResizeLayer(const IConnectableLayer* layer,
426  const ResizeDescriptor& resizeDescriptor,
427  const char* name = nullptr) = 0;
428 
429  /// Function that a slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.
430  /// @param layer - pointer to the layer which is calling back to this visit function.
431  /// @param sliceDescriptor - SliceDescriptor to configure the slice operation.
432  /// @param name - Optional name for the layer.
433  virtual void VisitSliceLayer(const IConnectableLayer* layer,
434  const SliceDescriptor& sliceDescriptor,
435  const char* name = nullptr) = 0;
436 
437 
438  /// Function that a softmax layer should call back to when its Accept(ILayerVisitor&) function is invoked.
439  /// @param layer - pointer to the layer which is calling back to this visit function.
440  /// @param softmaxDescriptor - SoftmaxDescriptor to configure the softmax.
441  /// @param name - Optional name for the layer.
442  virtual void VisitSoftmaxLayer(const IConnectableLayer* layer,
443  const SoftmaxDescriptor& softmaxDescriptor,
444  const char* name = nullptr) = 0;
445 
446  /// Function a space to batch layer should call back to when its Accept(ILayerVisitor&) function is invoked.
447  /// @param layer - pointer to the layer which is calling back to this visit function.
448  /// @param spaceToBatchNdDescriptor - Parameters for the space to batch operation.
449  /// @param name - Optional name for the layer.
450  virtual void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
451  const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
452  const char* name = nullptr) = 0;
453 
454  /// Function a space to depth layer should call back to when its Accept(ILayerVisitor&) function is invoked.
455  /// @param layer - pointer to the layer which is calling back to this visit function.
456  /// @param spaceToDepthDescriptor - Parameters for the space to depth operation.
457  /// @param name - Optional name for the layer.
458  virtual void VisitSpaceToDepthLayer(const IConnectableLayer* layer,
459  const SpaceToDepthDescriptor& spaceToDepthDescriptor,
460  const char* name = nullptr) = 0;
461 
462  /// Function that a splitter layer should call back to when its Accept(ILayerVisitor&) function is invoked.
463  /// @param layer - pointer to the layer which is calling back to this visit function.
464  /// @param splitterDescriptor - ViewsDescriptor to configure the splitting process.
465  /// Number of Views must be equal to the number of outputs,
466  /// and their order must match - e.g. first view corresponds to
467  /// the first output, second view to the second output, etc....
468  /// @param name - Optional name for the layer.
469  virtual void VisitSplitterLayer(const IConnectableLayer* layer,
470  const ViewsDescriptor& splitterDescriptor,
471  const char* name = nullptr) = 0;
472 
473  /// Function a stack layer should call back to when its Accept(ILayerVisitor&) function is invoked.
474  /// @param layer - pointer to the layer which is calling back to this visit function.
475  /// @param stackDescriptor - Parameters for the stack operation.
476  /// @param name - Optional name for the layer.
477  virtual void VisitStackLayer(const IConnectableLayer* layer,
478  const StackDescriptor& stackDescriptor,
479  const char* name = nullptr) = 0;
480 
481  /// Function a StandInLayer should call back to when its Accept(ILaterVisitor&) function is invoked
482  /// @param layer - pointer to the layer which is calling back to this visit function.
483  /// @param standInDescriptor - Parameters for the stand-in layer.
484  /// @param name - Optional name for the layer.
485  virtual void VisitStandInLayer(const IConnectableLayer* layer,
486  const StandInDescriptor& standInDescriptor,
487  const char* name = nullptr) = 0;
488 
489  /// Function a strided slice layer should call back to when its Accept(ILayerVisitor&) function is invoked.
490  /// @param layer - pointer to the layer which is calling back to this visit function.
491  /// @param stridedSliceDescriptor - Parameters for the strided slice operation.
492  /// @param name - Optional name for the layer.
493  virtual void VisitStridedSliceLayer(const IConnectableLayer* layer,
494  const StridedSliceDescriptor& stridedSliceDescriptor,
495  const char* name = nullptr) = 0;
496 
497  /// Function a subtraction layer should call back to when its Accept(ILayerVisitor&) function is invoked.
498  /// @param layer - pointer to the layer which is calling back to this visit function.
499  /// @param name - Optional name for the layer.
500  virtual void VisitSubtractionLayer(const IConnectableLayer* layer,
501  const char* name = nullptr) = 0;
502 
503  /// Function a switch layer should call back to when its Accept(ILayerVisitor&) function is invoked.
504  /// @param layer - pointer to the layer which is calling back to this visit function.
505  /// @param name - Optional name for the layer.
506  virtual void VisitSwitchLayer(const IConnectableLayer* layer,
507  const char* name = nullptr) = 0;
508 
509  /// Function that a 2D transpose convolution layer should call back to when its Accept(ILayerVisitor&)
510  /// function is invoked.
511  /// @param layer - pointer to the layer which is calling back to this visit function.
512  /// @param descriptor - Description of the 2D transpose convolution layer.
513  /// @param weights - Tensor for the weights data.
514  /// @param biases - Optional tensor for the bias data.
515  /// @param name - Optional name for the layer.
516  virtual void VisitTransposeConvolution2dLayer(const IConnectableLayer* layer,
517  const TransposeConvolution2dDescriptor& descriptor,
518  const ConstTensor& weights,
519  const Optional<ConstTensor>& biases,
520  const char* name = nullptr) = 0;
521 
522  /// Function that a transpose layer should call back to when its Accept(ILayerVisitor&) function is invoked.
523  /// @param layer - pointer to the layer which is calling back to this visit function.
524  /// @param transposeDescriptor - TransposeDescriptor to configure the transpose.
525  /// @param name - Optional name for the layer.
526  virtual void VisitTransposeLayer(const IConnectableLayer* layer,
527  const TransposeDescriptor& transposeDescriptor,
528  const char* name = nullptr) = 0;
529 
530  virtual void StartVisit() {}
531  virtual void FinishVisit() {}
532 
533 };
const armnnSerializer::Pooling2dDescriptor * Pooling2dDescriptor
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290
#define ARMNN_DEPRECATED_MSG_REMOVAL_DATE(message, removed_in_release)
Definition: Deprecated.hpp:44
SoftmaxDescriptor LogSoftmaxDescriptor
A LogSoftmaxDescriptor for the LogSoftmaxLayer.
#define ARMNN_DEPRECATED_MSG(message)
Definition: Deprecated.hpp:43
SpaceToDepthDescriptor DepthToSpaceDescriptor
A DepthToSpaceDescriptor for the DepthToSpaceLayer.
const armnnSerializer::Pooling3dDescriptor * Pooling3dDescriptor

◆ ARMNN_DEPRECATED_MSG_REMOVAL_DATE() [2/2]

ARMNN_NO_DEPRECATE_WARN_BEGIN struct armnn::ARMNN_DEPRECATED_MSG_REMOVAL_DATE ( "ResizeBilinearQueueDescriptor is deprecated use ResizeQueueDescriptor instead ,
"22.08"   
)

◆ AssertNumberOfInputSlots()

void armnn::AssertNumberOfInputSlots ( Layer layer)

Definition at line 28 of file Layer.cpp.

References ARMNN_ASSERT, Convolution2d, DepthwiseConvolution2d, FullyConnected, Layer::GetNumInputSlots(), and Layer::GetType().

Referenced by InputSlot::Insert().

29 {
30  switch (layer.GetType())
31  {
32  case LayerType::Convolution2d:
33  case LayerType::DepthwiseConvolution2d:
35  {
36  ARMNN_ASSERT(layer.GetNumInputSlots() == 2 ||
37  layer.GetNumInputSlots() == 3);
38  break;
39  }
40  default:
41  {
42  ARMNN_ASSERT(layer.GetNumInputSlots() == 1);
43  break;
44  }
45  }
46 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.

◆ AssignBackends() [1/3]

OptimizationResult AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
Graph::Iterator firstLayer,
Graph::Iterator lastLayer,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 1018 of file Network.cpp.

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

Referenced by ApplyBackendOptimizations(), AssignBackends(), Optimize(), and TEST_SUITE().

1023 {
1024  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1025  OptimizationResult result;
1026 
1027  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1028  if (availablePreferredBackends.empty())
1029  {
1030  std::stringstream failureMsg;
1031  failureMsg << "No preferred backends are available";
1032  ReportError(failureMsg.str(), errMessages);
1033 
1034  result.m_Error = true;
1035  return result;
1036  }
1037 
1038  for (auto it = firstLayer; it != lastLayer; ++it)
1039  {
1040  AssignBackendsIConnectable(optNetObjPtr,
1041  *it,
1042  errMessages,
1043  result,
1044  backendSettings,
1045  availablePreferredBackends);
1046  }
1047 
1048  for (auto it = firstLayer; it != lastLayer; ++it)
1049  {
1050  auto layer = PolymorphicDowncast<Layer*>(*it);
1051 
1052  if(layer->GetType() == LayerType::Input)
1053  {
1054  BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1055  layer->SetBackendId(connectedBackendId);
1056  }
1057  }
1058 
1059  return result;
1060 }
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)
Definition: Network.cpp:556
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
void AssignBackendsIConnectable(OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > &> errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
Definition: Network.cpp:905

◆ AssignBackends() [2/3]

OptimizationResult AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
SubgraphView::IConnectableLayerIterator firstLayer,
SubgraphView::IConnectableLayerIterator lastLayer,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 1062 of file Network.cpp.

References ARMNN_SCOPED_PROFILING_EVENT, AssignBackendsIConnectable(), BackendSettings::GetAvailablePreferredBackends(), Input, OptimizationResult::m_Error, ReportError(), and Undefined.

1067 {
1068  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AssignBackends");
1069  OptimizationResult result;
1070 
1071  auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
1072  if (availablePreferredBackends.empty())
1073  {
1074  std::stringstream failureMsg;
1075  failureMsg << "No preferred backends are available";
1076  ReportError(failureMsg.str(), errMessages);
1077 
1078  result.m_Error = true;
1079  return result;
1080  }
1081 
1082  for (auto it = firstLayer; it != lastLayer; ++it)
1083  {
1084  AssignBackendsIConnectable(optNetObjPtr,
1085  *it,
1086  errMessages,
1087  result,
1088  backendSettings,
1089  availablePreferredBackends);
1090  }
1091 
1092  for (auto it = firstLayer; it != lastLayer; ++it)
1093  {
1094  auto layer = PolymorphicDowncast<Layer*>(*it);
1095 
1096  if(layer->GetType() == LayerType::Input)
1097  {
1098  BackendId connectedBackendId = layer->GetOutputSlot(0).GetConnection(0)->GetOwningLayer().GetBackendId();
1099  layer->SetBackendId(connectedBackendId);
1100  }
1101  }
1102 
1103  return result;
1104 }
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)
Definition: Network.cpp:556
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
void AssignBackendsIConnectable(OptimizedNetworkImpl *optNetObjPtr, IConnectableLayer *it, Optional< std::vector< std::string > &> errMessages, OptimizationResult &result, BackendSettings &backendSettings, std::vector< BackendId > &availablePreferredBackends)
Definition: Network.cpp:905

◆ AssignBackends() [3/3]

OptimizationResult armnn::AssignBackends ( OptimizedNetworkImpl optNetObjPtr,
BackendSettings backendSettings,
SubgraphView subgraph,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 1106 of file Network.cpp.

References AssignBackends(), SubgraphView::beginIConnectable(), and SubgraphView::endIConnectable().

1110 {
1111  SubgraphView::IConnectableLayerIterator firstLayer = subgraph.beginIConnectable();
1112  SubgraphView::IConnectableLayerIterator lastLayer = subgraph.endIConnectable();
1113  return AssignBackends(optNetObjPtr,
1114  backendSettings,
1115  firstLayer,
1116  lastLayer,
1117  errMessages);
1118 }
OptimizationResult AssignBackends(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:1106

◆ AssignBackendsIConnectable()

void armnn::AssignBackendsIConnectable ( OptimizedNetworkImpl optNetObjPtr,
IConnectableLayer it,
Optional< std::vector< std::string > &>  errMessages,
OptimizationResult result,
BackendSettings backendSettings,
std::vector< BackendId > &  availablePreferredBackends 
)

Definition at line 905 of file Network.cpp.

References ARMNN_ASSERT_MSG, AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Constant, CpuRef, Float32, OptimizedNetworkImpl::GetGraph(), Input, BackendSettings::IsBackendSupported(), BackendSettings::IsCpuRefUsed(), OptimizationResult::IsError(), OptimizationResult::IsOk(), OptimizationResult::IsWarningOnly(), OptimizationResult::m_Error, BackendSettings::m_SelectedBackends, MemCopy, Permute, and ReturnWithError().

Referenced by AssignBackends().

911 {
912  auto ReturnError = [&](const Layer* layer)
913  {
914  return ReturnWithError(result, layer, backendSettings, errMessages);
915  };
916 
917  auto layer = PolymorphicDowncast<Layer*>(it);
918 
919  if (layer->GetType() == LayerType::Input)
920  {
921  return;
922  }
923 
924  DataType dataTypeIn = layer->GetNumInputSlots() == 0 ? DataType::Float32 :
925  layer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType();
926  DataType dataTypeOut = layer->GetNumOutputSlots() == 0 ? DataType::Float32 :
927  layer->GetOutputSlot(0).GetTensorInfo().GetDataType();
928 
929  std::string reasonIfUnsupported;
930  bool found = false;
931  if (!CheckScaleSetOnQuantizedType(layer, errMessages))
932  {
933  // don't bomb immediately, find all the quantized outputs
934  // which haven't had a scale set and report them all back.
935  result.m_Error = true;
936  }
937 
938  // First try assign layer to hint backend
939  if (layer->GetBackendHint().has_value() &&
940  backendSettings.IsBackendSupported(layer->GetBackendHint().value()) &&
941  AttemptBackendAssignment(backendSettings,
942  optNetObjPtr->GetGraph(),
943  layer,
944  layer->GetBackendHint().value(),
945  dataTypeIn,
946  dataTypeOut,
947  availablePreferredBackends,
948  reasonIfUnsupported,
949  errMessages).IsOk())
950  {
951  found = true;
952  backendSettings.m_SelectedBackends.insert(layer->GetBackendHint().value());
953  }
954  else
955  {
956  // Try assign layer to prefered list of backends
957  for (const auto& backend : availablePreferredBackends)
958  {
959  if (layer->GetBackendHint().has_value() &&
960  layer->GetBackendHint().value() == backend)
961  {
962  continue; //Don't re-test the backend hint
963  }
964 
965  OptimizationResult res = AttemptBackendAssignment(backendSettings,
966  optNetObjPtr->GetGraph(),
967  layer,
968  backend,
969  dataTypeIn,
970  dataTypeOut,
971  availablePreferredBackends,
972  reasonIfUnsupported,
973  errMessages);
974 
975  if (res.IsOk())
976  {
977  found = true;
978  backendSettings.m_SelectedBackends.insert(backend);
979  break;
980  }
981  else if (res.IsError())
982  {
983  result = res; // Cannot continue.
984  // Note: we don't need to log the error as it would already
985  // be logged in AttemptBackendAssignment().
986  }
987  else
988  {
989  ARMNN_ASSERT_MSG(res.IsWarningOnly(), "OptimizationResult in unexpected state.");
990  }
991  }
992  }
993 
994  // If the layer is unsupported by any devices, log and return a null network.
995  if (!found)
996  {
997  // NOTE: if the layer is not an operation queue type AND we have not got CpuRef as a
998  // fallback we should set the compute device on the layer to CpuRef (these are not
999  // available as accelerated operations, or are only available under certain
1000  // conditions, currently they comprise MemCopy, Constant, Permute)
1001  armnn::LayerType layerType = layer->GetType();
1002  if (!backendSettings.IsCpuRefUsed() && (layerType == armnn::LayerType::MemCopy ||
1003  layerType == armnn::LayerType::Constant ||
1004  layerType == armnn::LayerType::Permute))
1005  {
1006  BackendId cpuBackendId(armnn::Compute::CpuRef);
1007  layer->SetBackendId(cpuBackendId);
1008  backendSettings.m_SelectedBackends.insert(cpuBackendId);
1009  }
1010  else
1011  {
1012  result = ReturnError(layer);
1013  }
1014  }
1015 
1016 }
CPU Execution: Reference C++ kernels.
OptimizationResult ReturnWithError(OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:580
DataType
Definition: Types.hpp:48
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
bool CheckScaleSetOnQuantizedType(Layer *layer, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:595
OptimizationResult AttemptBackendAssignment(BackendSettings &backendSettings, Graph &graph, Layer *layer, BackendId backend, DataType dataTypeIn, DataType dataTypeOut, const std::vector< BackendId > &availablePreferredBackends, std::string &reasonIfUnsupported, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:654
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ AssignSplitId()

void armnn::AssignSplitId ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo 
)

Definition at line 309 of file SubgraphViewSelector.cpp.

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

310 {
311  // Check each input to see if we can attach ourselves to any of the subgraphs that have already been assigned.
312  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
313  {
314  // We can only attach ourselves to the subgraph from this input if there isn't a cut here.
315  if (layerInfo.m_IsSelected == parentInfo.m_IsSelected)
316  {
317  // We also need to check that merging into this subgraph won't cause a dependency cycle between subgraphs.
318  // This will be the case if the subgraph that we will become part of is already a dependency
319  // of one of the subgraphs that are input to this layer, e.g:
320  //
321  // 0 | The numbers (0, 1) are the subgraph IDs of each layer and we are looking at layer X.
322  // / \ |
323  // 1 0 | We can't merge X into subgraph 0, because the left-hand input already depends on subgraph 0.
324  // \ / | We can however merge X into subgraph 1.
325  // X |
326  //
327  bool dependenciesOk = true;
328  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& otherParentInfo)
329  {
330  // We call HasAntecedent() ~ n^2 times, where n is the number of inputs to this layer.
331  // Hence it is important that this is efficient - see PartialSubgraph class description.
332  if (otherParentInfo.m_Subgraph->HasAntecedent(parentInfo.m_Subgraph.get()))
333  {
334  dependenciesOk = false;
335  }
336  });
337 
338  if (dependenciesOk)
339  {
340  // Merge into the subgraph of this input. If we have already been merged into another subgraph
341  // (from another input of this layer), then merge both of them together.
342  if (layerInfo.m_Subgraph == nullptr)
343  {
344  layerInfo.m_Subgraph = parentInfo.m_Subgraph;
345  }
346  else
347  {
348  // We call MergeWith() ~ n times, where n is the number of inputs to this layer.
349  // Therefore it does not need to be as performant as HasAntecedent().
350  layerInfo.m_Subgraph->MergeWith(parentInfo.m_Subgraph.get());
351  }
352  }
353  }
354  });
355 
356  // If we weren't able to merge into an existing subgraph then we need to make a new one
357  if (layerInfo.m_Subgraph == nullptr)
358  {
359  layerInfo.m_Subgraph = std::make_shared<PartialSubgraph>();
360  }
361 
362  // Record dependencies of the chosen subgraph based on the inputs of this layer.
363  ForEachLayerInput(layerInfos, layerInfo, [&](LayerSelectionInfo& parentInfo)
364  {
365  // These functions are called ~n times, where n is the number of inputs to this layer.
366  // Therefore it does not need to be as performant as HasAntecedent().
367  if (!layerInfo.m_Subgraph->IsMergedWith(parentInfo.m_Subgraph.get()))
368  {
369  layerInfo.m_Subgraph->AddDirectAntecedent(parentInfo.m_Subgraph.get());
370  }
371  });
372 }
void ForEachLayerInput(LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)

◆ AttemptBackendAssignment()

OptimizationResult armnn::AttemptBackendAssignment ( BackendSettings backendSettings,
Graph graph,
Layer layer,
BackendId  backend,
DataType  dataTypeIn,
DataType  dataTypeOut,
const std::vector< BackendId > &  availablePreferredBackends,
std::string &  reasonIfUnsupported,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 654 of file Network.cpp.

References BFloat16, Constant, ConvertBf16ToFp32, FloatingPointConverter::ConvertFloat16To32(), ConvertFp16ToFp32, ConvertFp32ToBf16, ConvertFp32ToFp16, Convolution2d, Float16, Float32, FullyConnected, BackendId::Get(), Layer::GetBackendId(), GetDataTypeName(), Layer::GetInputSlots(), GetLayerTypeAsCString(), Layer::GetOutputSlot(), Layer::GetType(), info, InsertConvertBf16ToFp32LayersBefore(), InsertConvertFp16ToFp32LayersBefore(), InsertConvertFp32ToBf16LayersAfter(), InsertConvertFp32ToFp16LayersAfter(), IWorkloadFactory::IsLayerSupported(), ConstantLayer::m_LayerOutput, ReportWarning(), ReturnWithError(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AssignBackendsIConnectable().

663 {
664  OptimizationResult result;
665 
666  // Helper lambda to compose meaningful error message before returning with error
667  auto ReturnError = [&](const Layer* layer)
668  {
669  return ReturnWithError(result, layer, backendSettings, errMessages);
670  };
671 
672  // need to set the compute device on the layer
673  // before we can check if it is supported
674  layer->SetBackendId(backend);
675  if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported))
676  {
677  if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
678  {
679  if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
680  && layer->GetType() != LayerType::ConvertFp32ToFp16
681  && layer->GetType() != LayerType::ConvertFp16ToFp32)
682  {
683  auto ConstantLayerFromFp16ToFp32 = [](Layer& layer)
684  {
685  if (layer.GetType() == LayerType::Constant)
686  {
687  ConstantLayer* constantLayer = PolymorphicDowncast<ConstantLayer*>(&layer);
688 
689  auto& info = constantLayer->m_LayerOutput->GetTensorInfo();
690 
691  if (info.GetDataType() == DataType::Float16)
692  {
693  std::vector<float> newValues(info.GetNumElements());
694 
696  constantLayer->m_LayerOutput->GetConstTensor<Half>(),
697  info.GetNumElements(),
698  newValues.data());
699 
700  TensorInfo newInfo(info);
701  newInfo.SetDataType(DataType::Float32);
702  ConstTensor newInput(newInfo, newValues);
703  constantLayer->m_LayerOutput.reset(new ScopedTensorHandle(newInput));
704 
705  layer.GetOutputSlot(0).SetTensorInfo(newInfo);
706  }
707  }
708  };
709 
710  bool checkType = false;
711 
712  for (auto inputSlot : layer->GetInputSlots())
713  {
714  auto connectedOutputSlot = inputSlot.GetConnectedOutputSlot();
715  if (connectedOutputSlot->GetOwningLayer().GetType() == LayerType::Constant)
716  {
717  if (connectedOutputSlot->GetNumConnections() == 1)
718  {
719  checkType = true;
720  ConstantLayerFromFp16ToFp32(connectedOutputSlot->GetOwningLayer());
721  }
722  }
723  }
724 
725  // Insert FP16 -> FP32 conversion layer before current layer
726  std::vector<ConvertFp16ToFp32Layer*> convertFp16ToFp32Layers;
727  if (dataTypeIn == DataType::Float16)
728  {
729  convertFp16ToFp32Layers =
730  InsertConvertFp16ToFp32LayersBefore(graph, *layer, checkType);
731  }
732 
733  // Insert FP32 -> FP16 conversion layer after current layer
734  std::vector<ConvertFp32ToFp16Layer*> convertFp32ToFp16Layers;
735  if (dataTypeOut == DataType::Float16)
736  {
737  convertFp32ToFp16Layers =
738  InsertConvertFp32ToFp16LayersAfter(graph, *layer);
739  }
740 
741  // Assign a supported backend to the newly introduced conversion layers
742  auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
743  {
744  bool supportedBackendFound = false;
745  std::string reasonIfUnsupported;
746 
747  // Try preferred backend first
748  layer->SetBackendId(preferredBackend);
750  EmptyOptional(),
751  reasonIfUnsupported))
752  {
753  supportedBackendFound = true;
754  }
755  else
756  {
757  for (const auto& backend : availablePreferredBackends)
758  {
759  // Skip preferred backend (we already determined that it is not supported)
760  if (backend == preferredBackend)
761  {
762  continue;
763  }
764 
765  layer->SetBackendId(backend);
767  EmptyOptional(),
768  reasonIfUnsupported))
769  {
770  supportedBackendFound = true;
771  break;
772  }
773  }
774  }
775 
776  return supportedBackendFound;
777  };
778 
779  for (ConvertFp16ToFp32Layer* convertLayer : convertFp16ToFp32Layers)
780  {
781  if (!AssignFirstSupportedBackend(convertLayer, backend))
782  {
783  return ReturnError(convertLayer);
784  }
785  }
786 
787  for (ConvertFp32ToFp16Layer* convertLayer : convertFp32ToFp16Layers)
788  {
789  if (!AssignFirstSupportedBackend(convertLayer, backend))
790  {
791  return ReturnError(convertLayer);
792  }
793  }
794 
795  return result;
796  }
797  }
798  else if (dataTypeIn == DataType::BFloat16 || dataTypeOut == DataType::BFloat16)
799  {
800  if (IWorkloadFactory::IsLayerSupported(*layer, DataType::Float32, reasonIfUnsupported)
801  && layer->GetType() != LayerType::ConvertFp32ToBf16
802  && layer->GetType() != LayerType::ConvertBf16ToFp32)
803  {
804  // Insert BF16 -> FP32 conversion layer before current layer
805  std::vector<ConvertBf16ToFp32Layer*> convertBf16ToFp32Layers;
806  if (dataTypeIn == DataType::BFloat16)
807  {
808  convertBf16ToFp32Layers =
810  if (layer->GetType() == LayerType::Convolution2d)
811  {
812  ConvertBf16ToFp32Weight<Convolution2dLayer>(layer);
813  }
814  else if (layer->GetType() == LayerType::FullyConnected)
815  {
816  ConvertBf16ToFp32Weight<FullyConnectedLayer>(layer);
817  }
818  }
819 
820  // Insert FP32 -> BF16 conversion layer after current layer
821  std::vector<ConvertFp32ToBf16Layer*> convertFp32ToBf16Layers;
822  if (dataTypeOut == DataType::BFloat16)
823  {
824  convertFp32ToBf16Layers =
825  InsertConvertFp32ToBf16LayersAfter(graph, *layer);
826  }
827 
828  // Assign a supported backend to the newly introduced conversion layers
829  auto AssignFirstSupportedBackend = [&](Layer* layer, BackendId preferredBackend)
830  {
831  bool supportedBackendFound = false;
832  std::string reasonIfUnsupported;
833 
834  // Try preferred backend first
835  layer->SetBackendId(preferredBackend);
837  EmptyOptional(),
838  reasonIfUnsupported))
839  {
840  supportedBackendFound = true;
841  }
842  else
843  {
844  for (const auto& backend : availablePreferredBackends)
845  {
846  // Skip preferred backend (we already determined that it is not supported)
847  if (backend == preferredBackend)
848  {
849  continue;
850  }
851 
852  layer->SetBackendId(backend);
854  EmptyOptional(),
855  reasonIfUnsupported))
856  {
857  supportedBackendFound = true;
858  break;
859  }
860  }
861  }
862 
863  return supportedBackendFound;
864  };
865 
866  for (ConvertBf16ToFp32Layer* convertLayer : convertBf16ToFp32Layers)
867  {
868  if (!AssignFirstSupportedBackend(convertLayer, backend))
869  {
870  return ReturnError(convertLayer);
871  }
872  }
873 
874  for (ConvertFp32ToBf16Layer* convertLayer : convertFp32ToBf16Layers)
875  {
876  if (!AssignFirstSupportedBackend(convertLayer, backend))
877  {
878  return ReturnError(convertLayer);
879  }
880  }
881 
882  return result;
883  }
884  }
885 
886  std::stringstream warningMsg;
887  warningMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
888  << " is not supported on requested backend " << layer->GetBackendId().Get()
889  << " for input data type " << GetDataTypeName(dataTypeIn)
890  << " and output data type " << GetDataTypeName(dataTypeOut)
891  << " (reason: " << reasonIfUnsupported
892  << "), falling back to the next backend.";
893  ReportWarning(warningMsg.str(), errMessages);
894 
895  return OptimizationResult(true, false);
896  }
897  else
898  {
899  return result;
900  }
901 }
bool IsLayerSupported(const armnn::Layer *layer)
Definition: MockBackend.cpp:60
std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter(Graph &graph, Layer &layer)
std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
OptimizationResult ReturnWithError(OptimizationResult res, const Layer *layer, const BackendSettings &backendSettings, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:580
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:202
std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore(Graph &graph, Layer &layer, bool expectCorrectInputType)
static void ConvertFloat16To32(const void *srcFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter(Graph &graph, Layer &layer)
void ReportWarning(const std::string &warningMessage, Optional< std::vector< std::string > &> warningMessages)
Definition: Network.cpp:568
half_float::half Half
Definition: Half.hpp:18
const char * GetLayerTypeAsCString(LayerType type)
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.

◆ BackendRegistryInstance()

◆ BatchNormImpl()

void BatchNormImpl ( const BatchNormalizationQueueDescriptor data,
Decoder< float > &  meanDecoder,
Decoder< float > &  varianceDecoder,
Decoder< float > &  betaDecoder,
Decoder< float > &  gammaDecoder,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 18 of file BatchNormImpl.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), GetTensorInfo(), DataLayoutIndexed::GetWidthIndex(), BatchNormalizationDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_Eps, QueueDescriptor::m_Inputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

Referenced by RefBatchNormalizationWorkload::ExecuteAsync().

25 {
26  const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
27  const TensorShape inputShape = inputInfo.GetShape();
28 
29  armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
30 
31  unsigned int inputBatches = inputShape[0];
32  unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
33  unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
34  unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
35 
36  for (unsigned int c = 0; c < inputChannels; c++)
37  {
38  meanDecoder[c];
39  varianceDecoder[c];
40  betaDecoder[c];
41  gammaDecoder[c];
42  float mean = meanDecoder.Get();
43  float var = varianceDecoder.Get();
44  float beta = betaDecoder.Get();
45  float gamma = gammaDecoder.Get();
46 
47  float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
48  float add = beta - mult * mean;
49 
50  for (unsigned int n = 0; n < inputBatches; n++)
51  {
52  for (unsigned int h = 0; h < inputHeight; h++)
53  {
54  for (unsigned int w = 0; w < inputWidth; w++)
55  {
56  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
57  inputDecoder[index];
58  outputEncoder[index];
59  outputEncoder.Set(mult * inputDecoder.Get() + add);
60  }
61  }
62  }
63  }
64 }
virtual void Set(IType right)=0
virtual IType Get() const =0
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38

◆ BatchToSpaceNd()

void BatchToSpaceNd ( const DataLayoutIndexed dataLayout,
const TensorInfo inputTensorInfo,
const TensorInfo outputTensorInfo,
const std::vector< unsigned int > &  blockShape,
const std::vector< std::pair< unsigned int, unsigned int >> &  cropsData,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 35 of file BatchToSpaceNd.cpp.

References ARMNN_ASSERT_MSG, BatchToSpaceNd(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumDimensions(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Offset(), and Encoder< IType >::Set().

Referenced by BatchToSpaceNd(), BatchToSpaceNdLayer::BatchToSpaceNdLayer(), and TEST_SUITE().

42 {
43  TensorShape inputShape = inputTensorInfo.GetShape();
44 
45  ARMNN_ASSERT_MSG(inputShape.GetNumDimensions() == 4, "Expected Input with 4 Dimensions");
46 
47  TensorShape outputShape = outputTensorInfo.GetShape();
48 
49  ARMNN_ASSERT_MSG(outputShape.GetNumDimensions() == 4, "Expected Output with 4 Dimensions");
50 
51  const unsigned int inputBatchSize = inputShape[0];
52  const unsigned int channels = inputShape[dataLayout.GetChannelsIndex()];
53 
54  const unsigned int outputBatchSize = outputShape[0];
55  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
56  const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
57 
58  ARMNN_ASSERT_MSG(blockShape.size() > 0, "BlockShape must contain 1 or more entries");
59 
60  const unsigned int blockShapeHeight = blockShape[0];
61  const unsigned int blockShapeWidth = blockShape[1];
62 
63  ARMNN_ASSERT_MSG(cropsData.size() > 0, "Crops must contain 1 or more entries");
64 
65  const unsigned int cropsTop = cropsData[0].first;
66  const unsigned int cropsLeft = cropsData[1].first;
67 
68  for (unsigned int inBatch = 0; inBatch < inputBatchSize; ++inBatch)
69  {
70  const unsigned int outBatch = inBatch % outputBatchSize;
71  const unsigned int spatialOffset = inBatch / outputBatchSize;
72 
73  for (unsigned int inH = 0; inH < inputTensorInfo.GetShape()[dataLayout.GetHeightIndex()]; ++inH) {
74  const unsigned int outH = inH * blockShapeHeight + spatialOffset / blockShapeWidth - cropsTop;
75 
76  if (outH >= outputHeight)
77  {
78  continue;
79  }
80 
81  for (unsigned int inW = 0; inW < inputTensorInfo.GetShape()[dataLayout.GetWidthIndex()]; ++inW) {
82  const unsigned int outW = inW * blockShapeWidth + spatialOffset % blockShapeWidth - cropsLeft;
83 
84  if (outW >= outputWidth)
85  {
86  continue;
87  }
88 
89  for (unsigned int c = 0; c < channels; c++)
90  {
91  unsigned int outOffset = Offset(outputShape, outBatch, outH, outW, c, dataLayout);
92  unsigned int inOffset = Offset(inputShape, inBatch, inH, inW, c, dataLayout);
93 
94  outputEncoder[outOffset];
95  inputDecoder[inOffset];
96  outputEncoder.Set(inputDecoder.Get());
97  }
98  }
99  }
100  }
101 }
unsigned int GetWidthIndex() const
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
unsigned int Offset(const TensorShape &shape, unsigned int batch, unsigned int height, unsigned int width, unsigned int channels, const DataLayoutIndexed &dataLayout)
virtual void Set(IType right)=0
unsigned int GetHeightIndex() const
virtual IType Get() const =0
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
unsigned int GetChannelsIndex() const

◆ CalcLevel()

int armnn::CalcLevel ( const Event eventPtr)

Definition at line 246 of file Profiling.cpp.

References Event::GetParentEvent().

Referenced by ProfilerImpl::AnalyzeEventsAndWriteResults(), and ProfilerImpl::PopulateParent().

247 {
248  int level = 0;
249  while (eventPtr != nullptr)
250  {
251  eventPtr = eventPtr->GetParentEvent();
252  level++;
253  }
254  return level;
255 }

◆ CalculateEdgeStrategy()

EdgeStrategy armnn::CalculateEdgeStrategy ( BackendsMap backends,
ITensorHandleFactory::FactoryId  srcFactoryId,
const Layer layer,
const Layer connectedLayer,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1512 of file Network.cpp.

References ARMNN_ASSERT_MSG, CopyToTarget, DirectCompatibility, ExportToTarget, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, PaddingRequired, ITensorHandleFactory::SupportsMapUnmap(), and Undefined.

Referenced by SelectTensorHandleStrategy().

1518 {
1519  auto toBackend = backends.find(connectedLayer.GetBackendId());
1520  ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1521 
1522  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1523 
1524  // Legacy API check for backward compatibility
1525  if (srcFactoryId == ITensorHandleFactory::LegacyFactoryId || dstPrefs.empty())
1526  {
1527  if (layer.GetBackendId() != connectedLayer.GetBackendId())
1528  {
1529  return EdgeStrategy::CopyToTarget;
1530  }
1531  else
1532  {
1533  return EdgeStrategy::DirectCompatibility;
1534  }
1535  }
1536 
1537  // TensorHandleFactory API present, so perform more sophisticated strategies.
1538  // Dst Output layers don't require copy because they use import or map/unmap
1539  if (connectedLayer.GetType() == LayerType::Output)
1540  {
1541  return EdgeStrategy::DirectCompatibility;
1542  }
1543 
1544  // Search for direct match in prefs
1545  for (auto&& pref : dstPrefs)
1546  {
1547  if (pref == srcFactoryId)
1548  {
1549  return EdgeStrategy::DirectCompatibility;
1550  }
1551  }
1552 
1553  // Search for export/import options
1554  ITensorHandleFactory* srcFactory = registry.GetFactory(srcFactoryId);
1555  if (srcFactory->GetExportFlags() != 0 && importEnabled)
1556  {
1557  for (auto&& pref : dstPrefs)
1558  {
1559  ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1560 
1561  // Handles cases when a destPref is not listed in TensorHandleFactoryRegistry
1562  if (!dstFactory) {
1563  continue;
1564  }
1565  if ((dstFactory->GetImportFlags() & srcFactory->GetExportFlags()) != 0)
1566  {
1567  auto srcCapability = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::PaddingRequired);
1568  auto dstCapability = dstFactory->GetCapabilities(&connectedLayer,
1569  &connectedLayer,
1570  CapabilityClass::PaddingRequired);
1571  auto srcFallback = srcFactory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1572  auto dstFallback = dstFactory->GetCapabilities(&connectedLayer,
1573  &connectedLayer,
1574  CapabilityClass::FallbackImportDisabled);
1575  // Do not require memory copy if the source and destination do not require padding.
1576  if (srcCapability.empty() && dstCapability.empty() && srcFallback.empty() && dstFallback.empty())
1577  {
1578  return EdgeStrategy::ExportToTarget;
1579  }
1580  }
1581  }
1582  }
1583 
1584  // Search for copy options via map/unmap
1585  if (srcFactory->SupportsMapUnmap())
1586  {
1587  for (auto&& pref : dstPrefs)
1588  {
1589  ITensorHandleFactory* dstFactory = registry.GetFactory(pref);
1590  if (dstFactory && dstFactory->SupportsMapUnmap())
1591  {
1592  return EdgeStrategy::CopyToTarget;
1593  }
1594  }
1595  }
1596 
1597  return EdgeStrategy::Undefined;
1598 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ CalculateGatherNdKeyIndices()

std::map< std::string, unsigned int > CalculateGatherNdKeyIndices ( TensorInfo  inputInfo0,
TensorInfo  inputInfo1 
)

Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1)

Parameters
inputInfo0- TensorInfo of the corresponding input tensor: params
inputInfo1- TensorInfo of the corresponding input tensor: indices
Returns
- A map with names and values for N, ND, K, W, C

Definition at line 300 of file WorkloadUtils.cpp.

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

Referenced by ClGatherNdWorkload::ClGatherNdWorkload(), ClGatherNdWorkloadValidate(), RefGatherNdWorkload::ExecuteAsync(), GatherTensorHandlePairs(), NeonGatherNdWorkload::NeonGatherNdWorkload(), and NeonGatherNdWorkloadValidate().

301 {
302  std::vector<unsigned int> paramsShape;
303  for (unsigned int i = 0; i < inputInfo0.GetNumDimensions(); ++i)
304  {
305  paramsShape.push_back(inputInfo0.GetShape()[i]);
306  }
307 
308  std::vector<unsigned int> indicesShape;
309  for (unsigned int i = 0; i < inputInfo1.GetNumDimensions(); ++i)
310  {
311  indicesShape.push_back(inputInfo1.GetShape()[i]);
312  }
313 
314  std::map<std::string, unsigned int> keyIndices;
315 
316  // N: number of batches
317  keyIndices["N"] = 1;
318 
319  // ND: number of dimensions that are sliced from params
320  keyIndices["ND"] = indicesShape.back();
321 
322  // W: number of indices in each batch (all but the last dimension)
323  keyIndices["W"] =
324  static_cast<unsigned int>(std::accumulate(std::begin(indicesShape),
325  std::end(indicesShape) - 1,
326  1,
327  std::multiplies<>() ));
328  // K: range of each index
329  keyIndices["K"] =
330  static_cast<unsigned int>(std::accumulate(std::begin(paramsShape),
331  std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
332  1,
333  std::multiplies<>() ));
334  // C: number of channels for each index
335  keyIndices["C"] =
336  static_cast<unsigned int>(std::accumulate(std::begin(paramsShape) + static_cast<int>(keyIndices["ND"]),
337  std::end(paramsShape),
338  1,
339  std::multiplies<>() ));
340 
341  return keyIndices;
342 }

◆ CalculateSlotOption()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOption ( BackendsMap backends,
OutputSlot outputSlot,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1362 of file Network.cpp.

References ARMNN_ASSERT_MSG, FallbackImportDisabled, Layer::GetBackendId(), ITensorHandleFactory::GetCapabilities(), OutputSlot::GetConnections(), ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), IBackendInternal::GetHandleFactoryPreferences(), Layer::GetInputSlots(), OutputSlot::GetOwningLayer(), Layer::GetType(), ITensorHandleFactory::LegacyFactoryId, Output, RequiresCopy(), and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

1366 {
1367  // First ensure the from backends can support the TensorHandeAPI
1368  Layer& layer = outputSlot.GetOwningLayer();
1369  auto frmBackend = backends.find(layer.GetBackendId());
1370  if (frmBackend == backends.end() ||
1371  !frmBackend->second->SupportsTensorAllocatorAPI())
1372  {
1373  return ITensorHandleFactory::LegacyFactoryId;
1374  }
1375 
1376  bool outputConnection = false;
1377  for (auto&& connection : outputSlot.GetConnections())
1378  {
1379  const Layer& connectedLayer = connection->GetOwningLayer();
1380  if (connectedLayer.GetType() == LayerType::Output)
1381  {
1382  outputConnection = true;
1383  }
1384  }
1385 
1386  IBackendInternal* srcBackend = frmBackend->second.get();
1387  auto srcPrefs = srcBackend->GetHandleFactoryPreferences();
1388 
1389  // Initialize the scores
1390  std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1391  for (auto&& pref : srcPrefs)
1392  {
1393  if (importEnabled)
1394  {
1395  ITensorHandleFactory* factory = registry.GetFactory(pref);
1396  if (outputConnection)
1397  {
1398  // Check if this is fallback case
1399  bool fallbackConnection = false;
1400  for (auto&& inputSlot : layer.GetInputSlots())
1401  {
1402  if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetBackendId() != layer.GetBackendId())
1403  {
1404  fallbackConnection = true;
1405  }
1406  }
1407  if (fallbackConnection)
1408  {
1409  auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1410  // Cannot use factory import if fallback import is not supported.
1411  if (!factoryCap.empty())
1412  {
1413  continue;
1414  }
1415  }
1416  else if (factory->GetExportFlags() == 0)
1417  {
1418  continue;
1419  }
1420  }
1421  if (!outputConnection)
1422  {
1423  auto factoryCap = factory->GetCapabilities(&layer, &layer, CapabilityClass::FallbackImportDisabled);
1424  // Cannot use factory import if fallback import is not supported.
1425  if (!factoryCap.empty())
1426  {
1427  continue;
1428  }
1429  }
1430 
1431  }
1432  else
1433  {
1434  // Only consider factories that support map/unmap
1435  ITensorHandleFactory* factory = registry.GetFactory(pref);
1436  if (!factory->SupportsMapUnmap())
1437  {
1438  // The current tensor handle factory does not support the map/unmap strategy, move to the next one
1439  continue;
1440  }
1441  }
1442 
1443 
1444  auto it = factoryScores.find(pref);
1445  if (it == factoryScores.end())
1446  {
1447  // Add new score to the table
1448  factoryScores[pref] = 0;
1449  }
1450  }
1451 
1452  // Score each handle factory based on how many times it requires copies on the slot connections
1453  for (auto&& connection : outputSlot.GetConnections())
1454  {
1455  const Layer& connectedLayer = connection->GetOwningLayer();
1456 
1457  auto toBackend = backends.find(connectedLayer.GetBackendId());
1458  ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1459 
1460  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1461  for (auto&& src : srcPrefs)
1462  {
1463  if (factoryScores.find(src) == factoryScores.end()) // Don't consider excluded factories
1464  {
1465  continue;
1466  }
1467 
1468  for (auto&& dst : dstPrefs)
1469  {
1470  if (RequiresCopy(src, dst, registry))
1471  {
1472  // Copy avoided, increase the score
1473  factoryScores[src]++;
1474  break;
1475  }
1476  }
1477  }
1478  }
1479 
1480  // Find the lowest score
1481  int minScore = std::numeric_limits<int>::max();
1482  for (auto it : factoryScores)
1483  {
1484  minScore = std::min(minScore, it.second);
1485  }
1486 
1487  // Collect factories matching the best(lowest) score
1488  std::vector<ITensorHandleFactory::FactoryId> optimalFactories;
1489  for (auto it : factoryScores)
1490  {
1491  if (it.second == minScore)
1492  {
1493  optimalFactories.push_back(it.first);
1494  }
1495  }
1496 
1497  // For all compatible Factories matching the best score, find the preferred one for the current layer.
1498  for (auto&& srcPref : srcPrefs)
1499  {
1500  for (auto&& comp : optimalFactories)
1501  {
1502  if (comp == srcPref)
1503  {
1504  return comp;
1505  }
1506  }
1507  }
1508 
1509  return ITensorHandleFactory::LegacyFactoryId;
1510 }
bool RequiresCopy(ITensorHandleFactory::FactoryId src, ITensorHandleFactory::FactoryId dst, TensorHandleFactoryRegistry &registry)
Definition: Network.cpp:1247
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ CalculateSlotOptionForInput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForInput ( BackendsMap backends,
OutputSlot slot,
TensorHandleFactoryRegistry registry,
bool  importEnabled 
)

Definition at line 1267 of file Network.cpp.

References ARMNN_ASSERT, ARMNN_ASSERT_MSG, Layer::GetBackendId(), OutputSlot::GetConnections(), TensorHandleFactoryRegistry::GetFactory(), ITensorHandleFactory::GetImportFlags(), OutputSlot::GetOwningLayer(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, and ITensorHandleFactory::SupportsMapUnmap().

Referenced by SelectTensorHandleStrategy().

1271 {
1272  Layer& layer = slot.GetOwningLayer();
1273  ARMNN_ASSERT(layer.GetType() == LayerType::Input);
1274 
1275  // Explicitly select the tensorhandle factory for InputLayer because the rules for it are slightly different. It
1276  // doesn't matter which backend it is assigned to because they all use the same implementation, which
1277  // requires Map/Unmap support. This means that, so long as the handle type supports map/unmap semantics, we can
1278  // select a factory with maximum compatibility with the layers connected to the InputLayer.
1279 
1280  // First ensure the from backends can support the TensorHandeAPI
1281  auto frmBackend = backends.find(layer.GetBackendId());
1282  if (frmBackend == backends.end() ||
1283  !frmBackend->second->SupportsTensorAllocatorAPI())
1284  {
1285  return ITensorHandleFactory::LegacyFactoryId;
1286  }
1287 
1288  // Go through all connections to the output slot and determine the TensorHandleFactory which results in the
1289  // fewest copies.
1290  std::map<ITensorHandleFactory::FactoryId, int> factoryScores;
1291  int topScore = 0;
1292  ITensorHandleFactory::FactoryId topChoice = ITensorHandleFactory::LegacyFactoryId;
1293 
1294  for (auto&& connection : slot.GetConnections())
1295  {
1296 
1297  const Layer& connectedLayer = connection->GetOwningLayer();
1298 
1299  auto toBackend = backends.find(connectedLayer.GetBackendId());
1300  ARMNN_ASSERT_MSG(toBackend != backends.end(), "Backend id not found for the connected layer");
1301 
1302  if (!toBackend->second.get()->SupportsTensorAllocatorAPI())
1303  {
1304  // The destination backend does not support the tensor allocator API, move to the next one
1305  continue;
1306  }
1307 
1308  auto dstPrefs = toBackend->second.get()->GetHandleFactoryPreferences();
1309  for (auto&& dst : dstPrefs)
1310  {
1311  // Input layers use the mem copy workload or import, so the selected factory must
1312  // support either the map/unmap API or Import API
1313  ITensorHandleFactory* factory = registry.GetFactory(dst);
1314  if (importEnabled && factory->GetImportFlags() == 0)
1315  {
1316  continue;
1317  }
1318  else if (!importEnabled && !factory->SupportsMapUnmap())
1319  {
1320  continue;
1321  }
1322 
1323  auto it = factoryScores.find(dst);
1324  if (it == factoryScores.end())
1325  {
1326  // Add new score to the table
1327  factoryScores[dst] = 0;
1328  if (topChoice == ITensorHandleFactory::LegacyFactoryId)
1329  {
1330  topChoice = dst;
1331  }
1332  }
1333  else
1334  {
1335  // Increase the score
1336  factoryScores[dst]++;
1337 
1338  // Track the best option
1339  if (factoryScores[dst] > topScore)
1340  {
1341  topScore = factoryScores[dst];
1342  topChoice = dst;
1343  }
1344  }
1345  }
1346  }
1347 
1348  return topChoice;
1349 }
ITensorHandleFactory::FactoryId FactoryId
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ CalculateSlotOptionForOutput()

ITensorHandleFactory::FactoryId armnn::CalculateSlotOptionForOutput ( BackendsMap backends,
OutputSlot slot,
TensorHandleFactoryRegistry registry 
)

Definition at line 1352 of file Network.cpp.

References ITensorHandleFactory::DeferredFactoryId, and IgnoreUnused().

Referenced by SelectTensorHandleStrategy().

1355 {
1356  IgnoreUnused(backends, slot, registry);
1357  return ITensorHandleFactory::DeferredFactoryId;
1358 }
void IgnoreUnused(Ts &&...)

◆ ChainReduceLayers()

std::vector<IConnectableLayer*> armnn::ChainReduceLayers ( OptimizationViews optimizationViews,
LayerType baseLayer,
ReduceDescriptor desc 
)

Definition at line 298 of file ArmComputeSubgraphUtils.hpp.

References ARMNN_ASSERT, ComputeReductionTensorShape(), OptimizationViews::GetINetwork(), Layer::GetInputSlot(), Layer::GetOutputSlot(), ReduceDescriptor::m_KeepDims, ReduceDescriptor::m_vAxis, and OutputSlot::SetTensorInfo().

301 {
302  // Vector of new chained layers, used for substitution.
303  std::vector<IConnectableLayer*> layers;
304 
305  // Vector of axes so each layer is reshaped correctly.
306  std::vector<uint32_t> axes;
307  unsigned int recalulatedAxis = 0;
308 
309  for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
310  {
311  // Get TensorInfo from base layer and reduce shape using axis.
312  TensorInfo layerInfo = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
313 
314  axes.emplace_back(desc.m_vAxis[i]);
315 
316  const TensorInfo& reducedTensorInfo = ComputeReductionTensorShape(layerInfo,
317  axes,
318  desc.m_KeepDims);
319 
320  // Create a vector for the single axis to be assigned to the descriptor.
321  // Update axis if keepDims is set reduce layers correctly.
322  std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);
323 
324  // Create a descriptor and assign single axis.
325  ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters();
326  newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end());
327 
328  // Add new layer to graph.
329  std::string layerName = "reduce_layer_" + std::to_string(i);
330 
331  Layer* replacementLayer = PolymorphicDowncast<Layer*>(
332  optimizationViews.GetINetwork()->AddReduceLayer(newReduceDescriptor,
333  layerName.c_str()));
334 
335  // Connect previous layer with new layer.
336  // The first and last layer will be connected when the subgraph is replaced.
337  if (!layers.empty())
338  {
339  layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0));
340  }
341 
342  // Set updated tensorInfo for new layer.
343  replacementLayer->GetOutputSlot(0).SetTensorInfo(reducedTensorInfo);
344 
345  if (!desc.m_KeepDims)
346  {
347  recalulatedAxis++;
348  }
349 
350  layers.emplace_back(replacementLayer);
351  }
352 
353  // Check if the TensorInfo from the last layer equals the inferred output from the original layer.
354  ARMNN_ASSERT(baseLayer->GetOutputSlot(0).GetTensorInfo() ==
355  PolymorphicDowncast<Layer*>(layers.back())->GetOutputSlot().GetTensorInfo());
356 
357  return layers;
358 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const TensorInfo ComputeReductionTensorShape(const armnn::TensorInfo &input, const std::vector< uint32_t > &vAxis, const bool keepDims)
Function to compute the output tensor shape based on the axes and if keepDims is set.

◆ CheckFlag()

bool armnn::CheckFlag ( MemorySourceFlags  flags,
MemorySource  source 
)
inline

Definition at line 41 of file MemorySources.hpp.

Referenced by LoadedNetwork::FreeWorkingMemory(), LoadedNetwork::ImportInputs(), and LoadedNetwork::ImportOutputs().

42 {
43  return (static_cast<MemorySourceFlags>(source) & flags) != 0;
44 }

◆ CheckLayerBindingId()

void armnn::CheckLayerBindingId ( LayerBindingId  visitorId,
LayerBindingId  id 
)

Definition at line 13 of file TestInputOutputLayerVisitor.hpp.

Referenced by TestInputLayerVisitor::ExecuteStrategy(), and TestOutputLayerVisitor::ExecuteStrategy().

14 {
15  CHECK_EQ(visitorId, id);
16 }

◆ CheckScaleSetOnQuantizedType()

bool armnn::CheckScaleSetOnQuantizedType ( Layer layer,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 595 of file Network.cpp.

References ARMNN_LOG, TensorInfo::GetDataType(), GetLayerTypeAsCString(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), OutputSlot::GetTensorInfo(), Layer::GetType(), info, QAsymmU8, ReportError(), TensorInfo::SetQuantizationOffset(), TensorInfo::SetQuantizationScale(), OutputSlot::SetTensorInfo(), Softmax, and warning.

Referenced by AssignBackendsIConnectable().

596 {
597  bool noErrors = true;
598  unsigned int numOutputs = layer->GetNumOutputSlots();
599  for (unsigned int i = 0; i < numOutputs; i++) {
600  OutputSlot& outputSlot = layer->GetOutputSlot(i);
601  TensorInfo info = outputSlot.GetTensorInfo();
602  if (DataType::QAsymmU8 == info.GetDataType()) {
603  if (0.f == info.GetQuantizationScale()) {
604  noErrors = false;
605  std::stringstream ss;
606  ss << "output " << i << " of layer " << GetLayerTypeAsCString(layer->GetType())
607  << " (" << layer->GetNameStr() << ") is of type"
608  << " Quantized 8 bit but its scale parameter has not been set";
609  ReportError(ss.str(), errMessages);
610  }
611  // Softmax under QuantisedAsymm8 must always be scale (1.0f/256.0f) and offset 0
612  if ((info.GetQuantizationScale() != (1.0f / 256.0f) ||
613  info.GetQuantizationOffset() != 0) &&
614  layer->GetType() == armnn::LayerType::Softmax)
615  {
616  std::stringstream ss;
617  ss << "Quantization parameters for Softmax layer (Scale: " <<
618  info.GetQuantizationScale() << " and Offset: " << info.GetQuantizationOffset() <<
619  ") are incorrect and have been updated to Scale: 0.00390625 and Offset: 0";
620  ARMNN_LOG(warning) << ss.str();
621  info.SetQuantizationScale((1.0f /256.0f));
622  info.SetQuantizationOffset(0);
623  outputSlot.SetTensorInfo(info);
624  }
625  }
626  }
627  return noErrors;
628 }
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)
Definition: Network.cpp:556
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
const char * GetLayerTypeAsCString(LayerType type)

◆ CheckSupportRule()

bool armnn::CheckSupportRule ( rule,
Optional< std::string &>  reasonIfUnsupported,
const char *  reason 
)

Definition at line 38 of file LayerSupportRules.hpp.

References OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by RefLayerSupport::IsActivationSupported(), RefLayerSupport::IsAdditionSupported(), RefLayerSupport::IsArgMinMaxSupported(), RefLayerSupport::IsBatchNormalizationSupported(), RefLayerSupport::IsBatchToSpaceNdSupported(), RefLayerSupport::IsCastSupported(), RefLayerSupport::IsChannelShuffleSupported(), RefLayerSupport::IsComparisonSupported(), RefLayerSupport::IsConcatSupported(), RefLayerSupport::IsConstantSupported(), RefLayerSupport::IsConvertBf16ToFp32Supported(), RefLayerSupport::IsConvertFp32ToBf16Supported(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDebugSupported(), RefLayerSupport::IsDepthToSpaceSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), RefLayerSupport::IsDequantizeSupported(), RefLayerSupport::IsDetectionPostProcessSupported(), RefLayerSupport::IsDivisionSupported(), RefLayerSupport::IsElementwiseUnarySupported(), RefLayerSupport::IsFakeQuantizationSupported(), RefLayerSupport::IsFillSupported(), RefLayerSupport::IsFloorSupported(), RefLayerSupport::IsFullyConnectedSupported(), RefLayerSupport::IsGatherNdSupported(), RefLayerSupport::IsGatherSupported(), RefLayerSupport::IsInstanceNormalizationSupported(), RefLayerSupport::IsL2NormalizationSupported(), RefLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogSoftmaxSupported(), RefLayerSupport::IsLstmSupported(), RefLayerSupport::IsMaximumSupported(), RefLayerSupport::IsMeanSupported(), RefLayerSupport::IsMemCopySupported(), RefLayerSupport::IsMinimumSupported(), RefLayerSupport::IsMultiplicationSupported(), RefLayerSupport::IsNormalizationSupported(), RefLayerSupport::IsPadSupported(), RefLayerSupport::IsPermuteSupported(), RefLayerSupport::IsPooling2dSupported(), RefLayerSupport::IsPooling3dSupported(), RefLayerSupport::IsPreluSupported(), RefLayerSupport::IsQuantizeSupported(), RefLayerSupport::IsRankSupported(), RefLayerSupport::IsReduceSupported(), RefLayerSupport::IsReshapeSupported(), RefLayerSupport::IsResizeSupported(), RefLayerSupport::IsShapeSupported(), RefLayerSupport::IsSliceSupported(), RefLayerSupport::IsSoftmaxSupported(), RefLayerSupport::IsSpaceToBatchNdSupported(), RefLayerSupport::IsSpaceToDepthSupported(), RefLayerSupport::IsSplitterSupported(), RefLayerSupport::IsStackSupported(), RefLayerSupport::IsStridedSliceSupported(), RefLayerSupport::IsSubtractionSupported(), RefLayerSupport::IsTransposeConvolution2dSupported(), RefLayerSupport::IsTransposeSupported(), and RefLayerSupport::IsUnidirectionalSequenceLstmSupported().

39 {
40  bool supported = rule();
41  if (!supported && reason)
42  {
43  reasonIfUnsupported.value() += std::string(reason) + "\n"; // Append the reason on a new line
44  }
45  return supported;
46 }

◆ ClAbsWorkloadValidate()

arm_compute::Status ClAbsWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file ClAbsWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
25 }

◆ ClActivationWorkloadValidate()

arm_compute::Status ClActivationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor 
)

Definition at line 17 of file ClActivationWorkload.cpp.

Referenced by ClLayerSupport::IsActivationSupported().

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  const arm_compute::ActivationLayerInfo activationLayerInfo =
26 
27  return arm_compute::CLActivationLayer::validate(&aclInput,
28  &aclOutput,
29  activationLayerInfo);
30 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClAdditionValidate()

arm_compute::Status ClAdditionValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 45 of file ClAdditionWorkload.cpp.

Referenced by ClLayerSupport::IsAdditionSupported(), and ClBackend::OptimizeSubgraphView().

49 {
50  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
51  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
52  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
53 
54  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
55  activationDescriptor);
56 
57  const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
58  &aclInput1Info,
59  &aclOutputInfo,
60  g_AclConvertPolicy,
61  activationInfo);
62 
63  return aclStatus;
64 }
Status
enumeration
Definition: Types.hpp:42
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClArgMinMaxWorkloadValidate()

arm_compute::Status ClArgMinMaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ArgMinMaxDescriptor descriptor 
)

Definition at line 31 of file ClArgMinMaxWorkload.cpp.

Referenced by ClLayerSupport::IsArgMinMaxSupported().

34 {
35  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
36  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
37 
38  auto numDims = input.GetNumDimensions();
39  auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
40  int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
41 
42  if (descriptor.m_Function == ArgMinMaxFunction::Max)
43  {
44  return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
45  arm_compute::ReductionOperation::ARG_IDX_MAX);
46  }
47  else
48  {
49  return arm_compute::CLArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
50  arm_compute::ReductionOperation::ARG_IDX_MIN);
51  }
52 }
unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ClBackendId()

constexpr const char* armnn::ClBackendId ( )

Definition at line 10 of file ClBackendId.hpp.

Referenced by ClBackend::GetIdStatic().

10 { return "GpuAcc"; }

◆ ClBatchNormalizationValidate()

arm_compute::Status ClBatchNormalizationValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file ClBatchNormalizationFloatWorkload.cpp.

Referenced by ClLayerSupport::IsBatchNormalizationSupported(), and ClBackend::OptimizeSubgraphView().

27 {
28  const arm_compute::TensorInfo aclInputInfo =
29  armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
30  const arm_compute::TensorInfo aclOutputInfo =
31  armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclMeanInfo =
33  armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
34  const arm_compute::TensorInfo aclVarInfo =
35  armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
36  const arm_compute::TensorInfo aclBetaInfo =
37  armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
38  const arm_compute::TensorInfo aclGammaInfo =
39  armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
40 
41  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
42  activationDescriptor);
43 
44  return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
45  &aclOutputInfo,
46  &aclMeanInfo,
47  &aclVarInfo,
48  &aclBetaInfo,
49  &aclGammaInfo,
50  descriptor.m_Eps,
51  activationInfo);
52 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClBatchToSpaceNdWorkloadValidate()

arm_compute::Status ClBatchToSpaceNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor 
)

Definition at line 57 of file ClBatchToSpaceNdWorkload.cpp.

References BatchToSpaceNdDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsBatchToSpaceNdSupported().

60 {
61  DataLayout dataLayout = descriptor.m_DataLayout;
62  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
63 
64  // ArmNN blockShape is [H, W] Cl asks for W, H
65  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
66  int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
67 
68  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
69 
70  const arm_compute::Status aclStatus = arm_compute::CLBatchToSpaceLayer::validate(&aclInputInfo,
71  blockWidth,
72  blockHeight,
73  &aclOutputInfo);
74  return aclStatus;
75 }
DataLayout
Definition: Types.hpp:62
Status
enumeration
Definition: Types.hpp:42
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ClCastValidate()

arm_compute::Status ClCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 20 of file ClCastWorkload.cpp.

Referenced by ClLayerSupport::IsCastSupported().

21 {
22  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  return arm_compute::CLCast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
26 }

◆ ClChannelShuffleValidate()

arm_compute::Status ClChannelShuffleValidate ( const TensorInfo input,
const TensorInfo output,
const ChannelShuffleDescriptor descriptor 
)

Definition at line 20 of file ClChannelShuffleWorkload.cpp.

Referenced by ClLayerSupport::IsChannelShuffleSupported().

23 {
24  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
26 
27  // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
28  // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
29  // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
30  arm_compute::DataLayout aclDataLayout;
31  if (input.GetNumDimensions() == 4)
32  {
33  switch (descriptor.m_Axis)
34  {
35  case 1:
36  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
37  break;
38  case 3:
39  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
40  break;
41  default:
42  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
43  }
44  aclInputInfo.set_data_layout(aclDataLayout);
45  aclOutputInfo.set_data_layout(aclDataLayout);
46  return arm_compute::CLChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
47  }
48  else
49  {
50  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
51  }
52 }
DataLayout
Definition: Types.hpp:62
Status
enumeration
Definition: Types.hpp:42

◆ ClComparisonWorkloadValidate()

arm_compute::Status ClComparisonWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ComparisonDescriptor descriptor 
)

Definition at line 24 of file ClComparisonWorkload.cpp.

Referenced by ClLayerSupport::IsComparisonSupported().

28 {
29  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
30  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
31  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
32 
33  const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
34 
35  const arm_compute::Status aclStatus = arm_compute::CLComparison::validate(&aclInput0Info,
36  &aclInput1Info,
37  &aclOutputInfo,
38  comparisonOperation);
39  return aclStatus;
40 }
ComparisonOperation
Definition: Types.hpp:108
Status
enumeration
Definition: Types.hpp:42
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl(const ComparisonDescriptor &descriptor)

◆ ClConcatWorkloadValidate()

arm_compute::Status ClConcatWorkloadValidate ( const std::vector< const TensorInfo *> &  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor 
)

Definition at line 27 of file ClConcatWorkload.cpp.

Referenced by ClLayerSupport::IsConcatSupported().

30 {
31  std::vector<arm_compute::TensorInfo> aclInputs;
32  for (const TensorInfo* input : inputs)
33  {
34  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
35  aclInputs.emplace_back(aclInputInfo);
36  }
37  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
38  std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
39  for (arm_compute::ITensorInfo& input : aclInputs)
40  {
41  aclInputPtrs.emplace_back(&input);
42  }
43 
44  size_t aclAxis = CalcAxis(descriptor);
45  return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
46 }

◆ ClConstantWorkloadValidate()

arm_compute::Status ClConstantWorkloadValidate ( const TensorInfo output)

Definition at line 18 of file ClConstantWorkload.cpp.

Referenced by ClLayerSupport::IsConstantSupported().

19 {
20  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  std::array<arm_compute::DataType,8> supportedTypes = {
23  arm_compute::DataType::F16,
24  arm_compute::DataType::F32,
25  arm_compute::DataType::QASYMM8,
26  arm_compute::DataType::QASYMM8_SIGNED,
27  arm_compute::DataType::QSYMM16,
28  arm_compute::DataType::QSYMM8,
29  arm_compute::DataType::QSYMM8_PER_CHANNEL,
30  arm_compute::DataType::S32
31  };
32  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
33 
34  if (it != end(supportedTypes))
35  {
36  return arm_compute::Status{};
37  }
38  else
39  {
40  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
41  }
42 }
Status
enumeration
Definition: Types.hpp:42

◆ ClContextBufferHasIdentifier()

bool armnn::ClContextBufferHasIdentifier ( const void *  buf)
inline

Definition at line 152 of file ClContextSchema_generated.h.

References ClContextIdentifier().

152  {
153  return flatbuffers::BufferHasIdentifier(
154  buf, ClContextIdentifier());
155 }
const char * ClContextIdentifier()

◆ ClContextExtension()

const char* armnn::ClContextExtension ( )
inline

Definition at line 167 of file ClContextSchema_generated.h.

167  {
168  return "armnn";
169 }

◆ ClContextIdentifier()

const char* armnn::ClContextIdentifier ( )
inline

◆ ClConvertFp16ToFp32WorkloadValidate()

arm_compute::Status ClConvertFp16ToFp32WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 44 of file ClConvertFp16ToFp32Workload.cpp.

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp16ToFp32Supported(), and ClConvertFp16ToFp32Workload::SupportsTensorHandleReplacement().

45 {
46  if (input.GetDataType() != DataType::Float16)
47  {
48  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float16");
49  }
50  if (output.GetDataType() != DataType::Float32)
51  {
52  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float32");
53  }
54 
55  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
56  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
59  &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
60 
61  return aclStatus;
62 }
Status
enumeration
Definition: Types.hpp:42

◆ ClConvertFp32ToFp16WorkloadValidate()

arm_compute::Status ClConvertFp32ToFp16WorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 44 of file ClConvertFp32ToFp16Workload.cpp.

References Float16, Float32, and TensorInfo::GetDataType().

Referenced by ClLayerSupport::IsConvertFp32ToFp16Supported(), and ClConvertFp32ToFp16Workload::SupportsTensorHandleReplacement().

45 {
46  if (input.GetDataType() != DataType::Float32)
47  {
48  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Input should be Float32");
49  }
50  if (output.GetDataType() != DataType::Float16)
51  {
52  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "Output should be Float16");
53  }
54 
55  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
56  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
59  &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
60 
61  return aclStatus;
62 }
Status
enumeration
Definition: Types.hpp:42

◆ ClConvolution2dWorkloadValidate()

arm_compute::Status ClConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 23 of file ClConvolution2dWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by ClLayerSupport::IsConvolution2dSupported(), and ClBackend::OptimizeSubgraphView().

30 {
31  // The arm_compute::CLConvolutionLayer supports both const and non const
32  // weights. However, in the case of non const weights we'd have to call
33  // prepare or configure for each inference which we're not setup to do just yet.
34  if (!weights.IsConstant())
35  {
36  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
37  "ArmNN ClConvolution2dWorkload does not support non constant weights."};
38  }
39 
40  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
41  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
42  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
43  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
44 
45  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
46  descriptor.m_DilationY);
47 
48  arm_compute::TensorInfo aclBiasesInfo;
49  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
50 
51  if (descriptor.m_BiasEnabled)
52  {
53  ARMNN_ASSERT(biases.has_value());
54  // Same for bias as weights. We don't currently support non const.
55  if (!biases.value().IsConstant())
56  {
57  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
58  "ArmNN ClConvolution2dWorkload does not support non constant bias."};
59  }
60  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
61  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
62  optionalAclBiasesInfo = &aclBiasesInfo;
63  }
64 
65  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
66 
67  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
68  activationDescriptor);
69 
70  return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
71  &aclWeightsInfo,
72  optionalAclBiasesInfo,
73  &aclOutputInfo,
74  layerInfo,
75  arm_compute::WeightsInfo(),
76  aclDilationInfo,
77  activationInfo,
78  isFastMathEnabled);
79 }
Status
enumeration
Definition: Types.hpp:42
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClConvolution3dWorkloadValidate()

arm_compute::Status ClConvolution3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution3dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 23 of file ClConvolution3dWorkload.cpp.

Referenced by ClLayerSupport::IsConvolution3dSupported().

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
33 
34  arm_compute::TensorInfo aclBiasesInfo;
35  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
36  if (descriptor.m_BiasEnabled)
37  {
38  ARMNN_ASSERT(biases.has_value());
39  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
40  optionalAclBiasesInfo = &aclBiasesInfo;
41  }
42 
43  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
44 
45  const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
46  isFastMathEnabled,
47  activationDescriptor);
48 
49  return arm_compute::CLConv3D::validate(&aclInputInfo,
50  &aclWeightsInfo,
51  optionalAclBiasesInfo,
52  &aclOutputInfo,
53  aclConv3DInfo);
54 }
arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor...
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ ClDepthToSpaceWorkloadValidate()

arm_compute::Status ClDepthToSpaceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthToSpaceDescriptor descriptor 
)

Definition at line 22 of file ClDepthToSpaceWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsDepthToSpaceSupported().

25 {
26  DataLayout dataLayout = descriptor.m_DataLayout;
27  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
28 
29  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
30 
31  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
32 
33  const arm_compute::Status aclStatus = arm_compute::CLDepthToSpaceLayer::validate(&aclInputInfo,
34  &aclOutputInfo,
35  blockSize);
36  return aclStatus;
37 }
DataLayout
Definition: Types.hpp:62
Status
enumeration
Definition: Types.hpp:42
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ClDepthwiseConvolutionWorkloadValidate()

arm_compute::Status ClDepthwiseConvolutionWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const ActivationDescriptor activationDescriptor 
)

Definition at line 26 of file ClDepthwiseConvolutionWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by ClLayerSupport::IsDepthwiseConvolutionSupported(), ClLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and ClBackend::OptimizeSubgraphView().

32 {
33  // The CL implemented workload does support both const and non const
34  // weights. However, in the case of non const weights we'd have to call
35  // prepare or configure for each inference which we're not setup to do just yet.
36  if (!weights.IsConstant())
37  {
38  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
39  "ArmNN ClDepthwiseConv2dWorkload does not support non constant weights."};
40  }
41 
42  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
43  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
44 
45  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
46  //
47  // ACL format for weights for depthwise is:
48  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
49  // - [1, C, H, W] for [N, C, H, W] input/output layout
50  //
51  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
52  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
53  // so we do the permute here for the TensorInfo weights.
54  unsigned int aclDepthMultiplier;
55  TensorInfo weightsPermuted;
56  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
57 
58  // Convert the weights into the compute library format
59  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
60  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
61 
62  arm_compute::TensorInfo aclBiasesInfo;
63  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
64  if (descriptor.m_BiasEnabled)
65  {
66  ARMNN_ASSERT(biases.has_value());
67  // Same for bias as weights. We don't currently support non const.
68  if (!biases.value().IsConstant())
69  {
70  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
71  "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
72  }
73  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
74  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
75  optionalAclBiasesInfo = &aclBiasesInfo;
76  }
77 
78  const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
79  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
80  descriptor.m_DilationX,
81  descriptor.m_DilationY);
82 
83  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
84  activationDescriptor);
85 
86  return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
87  &aclWeightsInfo,
88  optionalAclBiasesInfo,
89  &aclOutputInfo,
90  aclPadStrideInfo,
91  aclDepthMultiplier,
92  activationInfo,
93  aclDilationInfo);
94 
95 }
Status
enumeration
Definition: Types.hpp:42
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClDequantizeWorkloadValidate()

arm_compute::Status ClDequantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file ClDequantizeWorkload.cpp.

Referenced by ClLayerSupport::IsDequantizeSupported().

23 {
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
26 
27  return arm_compute::CLDequantizationLayer::validate(&aclInputInfo, &aclOutputInfo);
28 }

◆ ClDivisionWorkloadValidate()

arm_compute::Status ClDivisionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file ClDivisionWorkload.cpp.

Referenced by ClLayerSupport::IsDivisionSupported(), and ClBackend::OptimizeSubgraphView().

22 {
23  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
28  activationDescriptor);
29 
30  return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo);
31 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClExpWorkloadValidate()

arm_compute::Status ClExpWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClExpWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
24 }

◆ ClFloorWorkloadValidate()

arm_compute::Status ClFloorWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 14 of file ClFloorFloatWorkload.cpp.

Referenced by ClLayerSupport::IsFloorSupported().

16 {
17  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
18  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
19 
20  return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
21 }

◆ ClFullyConnectedWorkloadValidate()

arm_compute::Status ClFullyConnectedWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const FullyConnectedDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file ClFullyConnectedWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by ClLayerSupport::IsFullyConnectedSupported(), and ClBackend::OptimizeSubgraphView().

25 {
26  // The CL implemented workload does support both const and non const
27  // weights. However, in the case of non const weights we'd have to call
28  // prepare or configure for each inference which we're not setup to do just yet.
29  if (!weights.IsConstant())
30  {
31  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
32  "Arm NN ClFullyConnectedWorkload does not support non constant weights."};
33  }
34  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
35  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
36  arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
37  aclWeights.set_are_values_constant(weights.IsConstant());
38 
39  arm_compute::TensorInfo aclBiases;
40  arm_compute::TensorInfo* optionalAclBiases = nullptr;
41  if (descriptor.m_BiasEnabled)
42  {
43  ARMNN_ASSERT(biases.has_value());
44  // Same for bias as weights. We don't currently support non const.
45  if (!biases.value().IsConstant())
46  {
47  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
48  "Arm NN ClFullyConnectedWorkload does not support non constant bias."};
49  }
50  aclBiases = BuildArmComputeTensorInfo(biases.value());
51  aclBiases.set_are_values_constant(biases.value().IsConstant());
52  optionalAclBiases = &aclBiases;
53  }
54 
55  const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
56  ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
57  return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
58  &aclWeights,
59  optionalAclBiases,
60  &aclOutput,
61  fullyConnectedLayerInfo);
62 }
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
Status
enumeration
Definition: Types.hpp:42
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ ClGatherNdWorkloadValidate()

arm_compute::Status ClGatherNdWorkloadValidate ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo 
)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 16 of file ClGatherNdWorkload.cpp.

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by ClLayerSupport::IsGatherNdSupported().

19 {
20  // Calculate ND, K, W, C.
21  std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
22 
23  /// Validate Mul
24  // Indices with shape { W, ND }
25  armnn::TensorInfo indices_W_ND_Info = indicesInfo;
26  indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
27  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
28 
29  // Flattened coefficients with shape { ND }
30  armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
31  flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
32  const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
33 
34  // Output of Mul with shape { W, ND }
35  const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
36 
37  auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
38  &aclFlattenedCoeffInfo,
39  &aclOutputMulInfo,
40  1.0f,
41  arm_compute::ConvertPolicy::WRAP,
42  arm_compute::RoundingPolicy::TO_ZERO,
43  arm_compute::ActivationLayerInfo());
44 
45  /// Validate ReduceSum
46  // Flattened indices with shape { W }
47  armnn::TensorInfo flattenedIndices_Info = indicesInfo;
48  flattenedIndices_Info.SetShape({ keyIndices["W"] });
49  const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
50 
51  const std::vector<unsigned int> armnnReduceAxes(1, 1);
52  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
53  indices_W_ND_Info.GetNumDimensions(),
54  armnnReduceAxes);
55 
56  auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
57  &aclFlattenedIndicesInfo,
58  static_cast<unsigned int>(coords[0]),
59  arm_compute::ReductionOperation::SUM,
60  false);
61 
62  /// Validate Gather
63  // Params with shape { K, C }
64  armnn::TensorInfo params_K_C_Info = paramsInfo;
65  params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
66  const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
67 
68  // Output of gather with shape { W, C }
69  armnn::TensorInfo outputGather_Info = outputInfo;
70  outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
71  const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
72 
73  auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
74  auto statusGather =
75  arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
76 
77  /// Validate Reshape
78  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
79 
80  auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
81 
82  /// Return OK if all the layers are valid
83  auto okCode = arm_compute::ErrorCode::OK;
84  if (statusMul.error_code() == okCode &&
85  statusReduceSum.error_code() == okCode &&
86  statusGather.error_code() == okCode &&
87  statusReshape.error_code() == okCode)
88  {
89  return arm_compute::Status(arm_compute::ErrorCode::OK,
90  "All GatherND layers validate status OK.");
91  }
92  else
93  {
94  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
95  "GatherND layer validate status failed.");
96  }
97 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) ...
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:193
Status
enumeration
Definition: Types.hpp:42
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:195

◆ ClGatherWorkloadValidate()

arm_compute::Status ClGatherWorkloadValidate ( const TensorInfo input,
const TensorInfo indices,
const TensorInfo output,
const GatherDescriptor descriptor 
)

Definition at line 15 of file ClGatherWorkload.cpp.

Referenced by ClLayerSupport::IsGatherSupported().

19 {
20  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25 
26  return arm_compute::CLGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
27 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...

◆ ClImportTensorHandleFactoryId()

constexpr const char* armnn::ClImportTensorHandleFactoryId ( )

Definition at line 15 of file ClImportTensorHandleFactory.hpp.

Referenced by ClImportTensorHandleFactory::GetIdStatic().

16 {
17  return "Arm/Cl/ImportTensorHandleFactory";
18 }

◆ ClInstanceNormalizationWorkloadValidate()

arm_compute::Status ClInstanceNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const InstanceNormalizationDescriptor descriptor 
)

Definition at line 18 of file ClInstanceNormalizationWorkload.cpp.

Referenced by ClLayerSupport::IsInstanceNormalizationSupported().

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  return arm_compute::CLInstanceNormalizationLayer::validate(&aclInputInfo,
26  &aclOutputInfo,
27  descriptor.m_Gamma,
28  descriptor.m_Beta,
29  descriptor.m_Eps);
30 }

◆ ClL2NormalizationWorkloadValidate()

arm_compute::Status ClL2NormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor 
)

Definition at line 17 of file ClL2NormalizationFloatWorkload.cpp.

Referenced by ClLayerSupport::IsL2NormalizationSupported().

20 {
21  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
22  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
23 
24  int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
25 
26  return arm_compute::CLL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
27 }

◆ ClLogicalAndWorkloadValidate()

arm_compute::Status ClLogicalAndWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalAndWorkload.cpp.

Referenced by ClLayerSupport::IsLogicalBinarySupported().

23 {
24  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  const arm_compute::Status aclStatus = arm_compute::CLLogicalAnd::validate(&aclInputInfo0,
29  &aclInputInfo1,
30  &aclOutputInfo);
31  return aclStatus;
32 }
Status
enumeration
Definition: Types.hpp:42

◆ ClLogicalNotWorkloadValidate()

arm_compute::Status ClLogicalNotWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalNotWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::CLLogicalNot::validate(&aclInputInfo,
27  &aclOutputInfo);
28  return aclStatus;
29 }
Status
enumeration
Definition: Types.hpp:42

◆ ClLogicalOrWorkloadValidate()

arm_compute::Status ClLogicalOrWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 20 of file ClLogicalOrWorkload.cpp.

Referenced by ClLayerSupport::IsLogicalBinarySupported().

23 {
24  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  const arm_compute::Status aclStatus = arm_compute::CLLogicalOr::validate(&aclInputInfo0,
29  &aclInputInfo1,
30  &aclOutputInfo);
31  return aclStatus;
32 }
Status
enumeration
Definition: Types.hpp:42

◆ ClLogSoftmaxWorkloadValidate()

arm_compute::Status ClLogSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 17 of file ClLogSoftmaxWorkload.cpp.

Referenced by ClLayerSupport::IsLogSoftmaxSupported().

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25  return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
26 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...

◆ ClLogWorkloadValidate()

arm_compute::Status ClLogWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClLogWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLLogLayer::validate(&aclInput, &aclOutput);
24 }

◆ ClLstmFloatWorkloadValidate()

arm_compute::Status ClLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 244 of file ClLstmFloatWorkload.cpp.

Referenced by ClLayerSupport::IsLstmSupported().

249 {
250  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
251 
252  // The inputs and the outputs
253  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
254  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
255  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
256  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
257  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
258  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
259  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
260 
261  // Basic parameters
262  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
263  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
264  const arm_compute::TensorInfo aclInputToCellWeightsInfo
265  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
266  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
267  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
268  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
270  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
272  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
274  const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
275  const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
276  const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
277 
278  arm_compute::TensorInfo aclInputToInputWeightsInfo;
279  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
280  arm_compute::TensorInfo aclCellToInputWeightsInfo;
281  arm_compute::TensorInfo aclInputGateBiasInfo;
282  arm_compute::TensorInfo aclProjectionWeightsInfo;
283  arm_compute::TensorInfo aclProjectionBiasInfo;
284  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
285  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
286  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
287  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
288  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
289  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
290 
291  if (!descriptor.m_CifgEnabled)
292  {
293  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
294  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
295 
296  if (paramsInfo.m_CellToInputWeights != nullptr)
297  {
298  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
299  }
300  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
301  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
302  paramsInfo.m_CellToInputWeights != nullptr ?
303  &aclCellToInputWeightsInfo: nullptr,
304  &aclInputGateBiasInfo);
305  }
306 
307  if (descriptor.m_ProjectionEnabled)
308  {
309  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
310 
311  if (paramsInfo.m_ProjectionBias != nullptr)
312  {
313  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
314  }
315  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
316  paramsInfo.m_ProjectionBias != nullptr ?
317  &aclProjectionBiasInfo: nullptr);
318  }
319 
320  if (descriptor.m_PeepholeEnabled)
321  {
322  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
323  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
324  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
325  }
326 
327  float cell_threshold = descriptor.m_ClippingThresCell;
328  float projection_threshold = descriptor.m_ClippingThresProj;
329 
330  // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
331  arm_compute::ActivationLayerInfo activationLayerInfo =
332  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
333 
334  if (descriptor.m_LayerNormEnabled)
335  {
336  if (!descriptor.m_CifgEnabled)
337  {
338  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
339  }
340 
341  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
342 
343  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
344 
345  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
346 
347  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
348  nullptr : &aclInputLayerNormWeightsInfo,
349  &aclForgetLayerNormWeightsInfo,
350  &aclCellLayerNormWeightsInfo,
351  &aclOutputLayerNormWeightsInfo);
352  }
353 
354  return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
355  &aclInputToCellWeightsInfo,
356  &aclInputToOutputWeightsInfo,
357  &aclRecurrentToForgetWeightsInfo,
358  &aclRecurrentToCellWeightsInfo,
359  &aclRecurrentToOutputWeightsInfo,
360  &aclForgetGateBiasInfo,
361  &aclCellBiasInfo,
362  &aclOutputGateBiasInfo,
363  &aclOutputStateInInfo, &aclCellStateInInfo,
364  &aclScratchBufferInfo, &aclOutputStateOutInfo,
365  &aclCellStateOutInfo, &aclOutputInfo,
366  lstm_params_info, activationLayerInfo,
367  cell_threshold, projection_threshold);
368 }
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)

◆ ClMaximumWorkloadValidate()

arm_compute::Status ClMaximumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 24 of file ClMaximumWorkload.cpp.

Referenced by ClLayerSupport::IsMaximumSupported().

27 {
28  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
29  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  const arm_compute::Status aclStatus = arm_compute::CLElementwiseMax::validate(&aclInput0Info,
33  &aclInput1Info,
34  &aclOutputInfo);
35 
36  return aclStatus;
37 }
Status
enumeration
Definition: Types.hpp:42

◆ ClMeanValidate()

arm_compute::Status ClMeanValidate ( const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor 
)

Definition at line 17 of file ClMeanWorkload.cpp.

Referenced by ClLayerSupport::IsMeanSupported().

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
25  input.GetNumDimensions(),
26  descriptor.m_Axis);
27 
28  return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
29 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates

◆ ClMinimumWorkloadValidate()

arm_compute::Status ClMinimumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 24 of file ClMinimumWorkload.cpp.

Referenced by ClLayerSupport::IsMinimumSupported().

27 {
28  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
29  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  const arm_compute::Status aclStatus = arm_compute::CLElementwiseMin::validate(&aclInput0Info,
33  &aclInput1Info,
34  &aclOutputInfo);
35 
36  return aclStatus;
37 }
Status
enumeration
Definition: Types.hpp:42

◆ ClMultiplicationWorkloadValidate()

arm_compute::Status ClMultiplicationWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file ClMultiplicationWorkload.cpp.

Referenced by ClLayerSupport::IsMultiplicationSupported(), and ClBackend::OptimizeSubgraphView().

22 {
23  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
28  arm_compute::ConvertPolicy::SATURATE :
29  arm_compute::ConvertPolicy::WRAP;
30 
31  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
32  activationDescriptor);
33 
34  // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
35  // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
36  // ignored for F32 tensors.
37  return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
38  &aclInput2,
39  &aclOutput,
40  1.0f,
41  convertPolicy,
42  arm_compute::RoundingPolicy::TO_ZERO,
43  activationInfo);
44 }
constexpr bool IsQuantizedType()
Definition: TypesUtils.hpp:280
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClNegWorkloadValidate()

arm_compute::Status ClNegWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClNegWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
24 }

◆ ClNormalizationWorkloadValidate()

arm_compute::Status ClNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor 
)

Definition at line 19 of file ClNormalizationFloatWorkload.cpp.

Referenced by ClLayerSupport::IsNormalizationSupported().

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
27 
28  return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
29 }

◆ ClPadValidate()

arm_compute::Status ClPadValidate ( const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor 
)

Definition at line 62 of file ClPadWorkload.cpp.

Referenced by ClLayerSupport::IsPadSupported().

65 {
66  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
67  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
68 
69  std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
70 
71  std::reverse_copy(std::begin(descriptor.m_PadList),
72  std::end(descriptor.m_PadList),
73  std::begin(reversed_PadList));
74 
75  arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
76 
77  // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
78  arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
79  const arm_compute::Status aclStatus =
80  arm_compute::CLPadLayer::validate(&aclInputInfo,
81  &aclOutputInfo,
82  padList,
83  pixelValue,
84  ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
85 
86  return aclStatus;
87 }
Status
enumeration
Definition: Types.hpp:42
arm_compute::PaddingMode ConvertPaddingModeToAcl(const PaddingMode &paddingMode)

◆ ClPermuteWorkloadValidate()

arm_compute::Status ClPermuteWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor 
)

Definition at line 17 of file ClPermuteWorkload.cpp.

Referenced by ClLayerSupport::IsPermuteSupported().

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
24 
25  return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
26  armcomputetensorutils::BuildArmComputePermutationVector(mappings));
27 }

◆ ClPooling2dWorkloadValidate()

arm_compute::Status ClPooling2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor 
)

Definition at line 18 of file ClPooling2dWorkload.cpp.

Referenced by ClLayerSupport::IsPooling2dSupported().

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
26 
27  return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
28 }

◆ ClPooling3dWorkloadValidate()

arm_compute::Status ClPooling3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling3dDescriptor descriptor 
)

Definition at line 18 of file ClPooling3dWorkload.cpp.

Referenced by ClLayerSupport::IsPooling3dSupported().

21  {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
24 
25  arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
26 
27  return arm_compute::CLPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
28  }

◆ ClPreluWorkloadValidate()

arm_compute::Status ClPreluWorkloadValidate ( const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output 
)

Definition at line 16 of file ClPreluWorkload.cpp.

Referenced by ClLayerSupport::IsPreluSupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::CLPReluLayer::validate(&aclInput,
25  &aclAlpha,
26  &aclOutput);
27 }

◆ ClQLstmWorkloadValidate()

arm_compute::Status ClQLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const TensorInfo output,
const QLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 247 of file ClQLstmWorkload.cpp.

Referenced by ClLayerSupport::IsQLstmSupported().

255 {
256  arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
257 
258  // Input/Output tensor info
259  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
260  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
261  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
262 
263  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
264  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
265  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
266 
267  // Mandatory tensor info
268  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
270  const arm_compute::TensorInfo aclInputToCellWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
272  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
274  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
275  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
276  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
277  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
278  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
279  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
280  const arm_compute::TensorInfo aclForgetGateBiasInfo
281  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
282  const arm_compute::TensorInfo aclCellBiasInfo
283  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
284  const arm_compute::TensorInfo aclOutputGateBiasInfo
285  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
286 
287  // Optional tensor info
288  arm_compute::TensorInfo aclInputToInputWeightsInfo;
289  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
290 
291  arm_compute::TensorInfo aclCellToInputWeightsInfo;
292  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
293  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
294 
295  arm_compute::TensorInfo aclInputGateBiasInfo;
296 
297  arm_compute::TensorInfo aclProjectionWeightsInfo;
298  arm_compute::TensorInfo aclProjectionBiasInfo;
299 
300  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
301  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
302  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
303  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
304 
305  // Create tensor info for optional params if they are enabled
306  if (descriptor.m_PeepholeEnabled)
307  {
308  if (!descriptor.m_CifgEnabled)
309  {
310  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
311  }
312 
313  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
314  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
315 
316  // Set peephole params info
317  aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
318  &aclCellToOutputWeightsInfo);
319  }
320 
321  if (descriptor.m_ProjectionEnabled)
322  {
323  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
324 
325  if (paramsInfo.m_ProjectionBias != nullptr)
326  {
327  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
328  }
329 
330  // Set projection params info
331  aclParamsInfo.set_projection_params(
332  &aclProjectionWeightsInfo,
333  paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
334  }
335 
336  if (descriptor.m_LayerNormEnabled)
337  {
338  if (!descriptor.m_CifgEnabled)
339  {
340  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
341  }
342 
343  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
344  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
345  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
346 
347  // Set layer norm params info
348  aclParamsInfo.set_layer_normalization_params(
349  paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
350  &aclForgetLayerNormWeightsInfo,
351  &aclCellLayerNormWeightsInfo,
352  &aclOutputLayerNormWeightsInfo);
353  }
354 
355  if (!descriptor.m_CifgEnabled)
356  {
357  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
358  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
359  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
360 
361  // Set CIFG params info
362  aclParamsInfo.set_cifg_params(
363  &aclInputToInputWeightsInfo,
364  &aclRecurrentToInputWeightsInfo,
365  paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
366  &aclInputGateBiasInfo);
367  }
368 
369  // Set scalar descriptor params
370  aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
371  aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
372  aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
373  aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
374  descriptor.m_ForgetIntermediateScale,
375  descriptor.m_CellIntermediateScale,
376  descriptor.m_OutputIntermediateScale);
377 
378  // QLSTM CL validate
379  return arm_compute::CLQLSTMLayer::validate(&aclInputInfo,
380  &aclInputToForgetWeightsInfo,
381  &aclInputToCellWeightsInfo,
382  &aclInputToOutputWeightsInfo,
383  &aclRecurrentToForgetWeightsInfo,
384  &aclRecurrentToCellWeightsInfo,
385  &aclRecurrentToOutputWeightsInfo,
386  &aclForgetGateBiasInfo,
387  &aclCellBiasInfo,
388  &aclOutputGateBiasInfo,
389  &aclCellStateInInfo,
390  &aclOutputStateInInfo,
391  &aclCellStateOutInfo,
392  &aclOutputStateOutInfo,
393  &aclOutputInfo,
394  aclParamsInfo);
395 }

◆ ClQuantizedLstmWorkloadValidate()

arm_compute::Status ClQuantizedLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo previousCellStateIn,
const TensorInfo previousOutputIn,
const TensorInfo cellStateOut,
const TensorInfo output,
const QuantizedLstmInputParamsInfo paramsInfo 
)

Definition at line 18 of file ClQuantizedLstmWorkload.cpp.

Referenced by ClLayerSupport::IsQuantizedLstmSupported().

22 {
23  // Inputs
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclPreviousCellStateInInfo = BuildArmComputeTensorInfo(previousCellStateIn);
26  const arm_compute::TensorInfo aclPreviousOutputInInfo = BuildArmComputeTensorInfo(previousOutputIn);
27 
28  // Outputs
29  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
30  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
31 
32  // Basic parameters
33  const arm_compute::TensorInfo aclInputToInputWeightsInfo
34  = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
35  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
36  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
37  const arm_compute::TensorInfo aclInputToCellWeightsInfo
38  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
39  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
40  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
41  const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
42  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
43  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
44  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
45  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
46  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
47  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
48  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
49  const arm_compute::TensorInfo aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
50  const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
51  const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
52  const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
53 
54  return arm_compute::CLLSTMLayerQuantized::validate(&aclInputInfo, &aclInputToInputWeightsInfo,
55  &aclInputToForgetWeightsInfo, &aclInputToCellWeightsInfo,
56  &aclInputToOutputWeightsInfo, &aclRecurrentToInputWeightsInfo,
57  &aclRecurrentToForgetWeightsInfo, &aclRecurrentToCellWeightsInfo,
58  &aclRecurrentToOutputWeightsInfo, &aclInputGateBiasInfo,
59  &aclForgetGateBiasInfo, &aclCellBiasInfo, &aclOutputGateBiasInfo,
60  &aclPreviousCellStateInInfo, &aclPreviousOutputInInfo,
61  &aclCellStateOutInfo, &aclOutputInfo);
62 }

◆ ClQuantizeWorkloadValidate()

arm_compute::Status ClQuantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file ClQuantizeWorkload.cpp.

Referenced by ClLayerSupport::IsQuantizeSupported().

24 {
25  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
27 
28  return arm_compute::CLQuantizationLayer::validate(&aclInputInfo,
29  &aclOutputInfo);
30 }

◆ ClReduceWorkloadValidate()

arm_compute::Status ClReduceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor 
)

Definition at line 18 of file ClReduceWorkload.cpp.

References ReduceDescriptor::m_vAxis.

Referenced by ClLayerSupport::IsReduceSupported().

21 {
22  if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
23  {
24  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
25  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
28  input.GetNumDimensions(),
29  descriptor.m_vAxis);
30 
31  return arm_compute::CLReductionOperation::validate(&aclInputInfo,
32  &aclOutputInfo,
33  static_cast<unsigned int>(coords[0]),
35  descriptor.m_KeepDims);
36  }
37  else
38  {
39  // Validate layer if there are multiple axes.
40  arm_compute::Status status;
41  IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
42  return status;
43  }
44 }
#define IS_MULTI_AXES_REDUCE_SUPPORTED(func, input, desc, status)
Macro function check if layer with multiple axes is supported on each backend.
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor &descriptor)
Status
enumeration
Definition: Types.hpp:42
arm_compute::Status ClReduceWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)

◆ ClReshapeWorkloadValidate()

arm_compute::Status ClReshapeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 15 of file ClReshapeWorkload.cpp.

Referenced by ClLayerSupport::IsReshapeSupported().

17 {
18  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
20 
21  return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
22 }

◆ ClResizeWorkloadValidate()

arm_compute::Status ClResizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file ClResizeWorkload.cpp.

Referenced by ClLayerSupport::IsResizeSupported().

25 {
26  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
27  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
28 
29  arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
30  aclInputInfo.set_data_layout(aclDataLayout);
31  aclOutputInfo.set_data_layout(aclDataLayout);
32 
33  arm_compute::InterpolationPolicy aclInterpolationPolicy =
35 
36  arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
37  arm_compute::SamplingPolicy::TOP_LEFT;
38 
39  return arm_compute::CLScale::validate(&aclInputInfo,
40  &aclOutputInfo,
41  arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
42  arm_compute::BorderMode::REPLICATE,
43  arm_compute::PixelValue(0.f),
44  samplingPolicy,
45  true,
46  descriptor.m_AlignCorners));
47 }
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)
DataLayout
Definition: Types.hpp:62

◆ ClRsqrtWorkloadValidate()

arm_compute::Status ClRsqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClRsqrtWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
24 }

◆ ClSinWorkloadValidate()

arm_compute::Status ClSinWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file ClSinWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::CLSinLayer::validate(&aclInput, &aclOutput);
24 }

◆ ClSliceWorkloadValidate()

arm_compute::Status ClSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SliceDescriptor descriptor 
)

Definition at line 18 of file ClSliceWorkload.cpp.

Referenced by ClLayerSupport::IsSliceSupported().

21 {
22  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
27 
28  std::tie(starts, ends) = SetClSliceData(descriptor.m_Begin, descriptor.m_Size);
29 
30  return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
31 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
auto SetClSliceData(const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)

◆ ClSoftmaxWorkloadValidate()

arm_compute::Status ClSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 17 of file ClSoftmaxWorkload.cpp.

Referenced by ClLayerSupport::IsSoftmaxSupported().

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
25  return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
26 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...

◆ ClSpaceToBatchNdWorkloadValidate()

arm_compute::Status ClSpaceToBatchNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor 
)

Definition at line 23 of file ClSpaceToBatchNdWorkload.cpp.

Referenced by ClLayerSupport::IsSpaceToBatchNdSupported().

26 {
27  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
28  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
29 
30  // ArmNN blockShape is [H, W] Cl asks for W, H
31  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
32  int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
33 
34  arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(
35  descriptor.m_PadList[1].first, descriptor.m_PadList[0].first);
36  arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(
37  descriptor.m_PadList[1].second, descriptor.m_PadList[0].second);
38 
39  return arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo,
40  blockWidth,
41  blockHeight,
42  paddingLeftTop,
43  paddingRightBottom,
44  &aclOutputInfo);
45 }
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ClSpaceToDepthWorkloadValidate()

arm_compute::Status ClSpaceToDepthWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor 
)

Definition at line 54 of file ClSpaceToDepthWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by ClLayerSupport::IsSpaceToDepthSupported().

57 {
58  DataLayout dataLayout = descriptor.m_DataLayout;
59  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
60 
61  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
62 
63  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
64 
65  const arm_compute::Status aclStatus = arm_compute::CLSpaceToDepthLayer::validate(&aclInputInfo,
66  &aclOutputInfo,
67  blockSize);
68  return aclStatus;
69 }
DataLayout
Definition: Types.hpp:62
Status
enumeration
Definition: Types.hpp:42
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ClSplitterWorkloadValidate()

arm_compute::Status ClSplitterWorkloadValidate ( const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
unsigned int  splitAxis 
)

Definition at line 31 of file ClSplitterWorkload.cpp.

Referenced by ClLayerSupport::IsSplitterSupported().

34 {
35  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
36 
37  size_t numOutputs = outputs.size();
38 
39  std::vector<arm_compute::TensorInfo> aclOutputs;
40  aclOutputs.reserve(numOutputs);
41 
42  std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
43  aclOutputPtr.reserve(numOutputs);
44 
45  for (size_t i = 0u; i < outputs.size(); ++i)
46  {
47  aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
48  aclOutputPtr.emplace_back(&aclOutputs.back());
49  }
50 
51  unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
52  return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
53 }

◆ ClSqrtWorkloadValidate()

arm_compute::Status ClSqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file ClSqrtWorkload.cpp.

Referenced by ClLayerSupport::IsElementwiseUnarySupported().

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  ActivationDescriptor descriptor;
25  descriptor.m_Function = ActivationFunction::Sqrt;
26  const arm_compute::ActivationLayerInfo activationLayerInfo =
28 
29  return arm_compute::CLActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
30 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClStackWorkloadValidate()

arm_compute::Status ClStackWorkloadValidate ( const std::vector< const TensorInfo *> &  inputs,
const TensorInfo output,
const StackDescriptor descriptor 
)

Definition at line 29 of file ClStackWorkload.cpp.

Referenced by ClLayerSupport::IsStackSupported().

32 {
33  std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
34  arm_compute::TensorInfo aclInputInfo;
35  for (const TensorInfo* input : inputs)
36  {
37  aclInputInfo = BuildArmComputeTensorInfo(*input);
38  aclInputPtrs.emplace_back(&aclInputInfo);
39  }
40  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
41 
42  int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
43 
44  return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
45 }

◆ ClStridedSliceWorkloadValidate()

arm_compute::Status ClStridedSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor 
)

Definition at line 27 of file ClStridedSliceWorkload.cpp.

Referenced by ClLayerSupport::IsStridedSliceSupported().

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33 
37 
38  std::tie(starts, ends, strides) = SetClStridedSliceData(descriptor.m_Begin, descriptor.m_End, descriptor.m_Stride);
39 
40  auto numDimensions = armnn::numeric_cast<int>(input.GetNumDimensions());
41  int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
42  int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
43  int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
44 
45  return arm_compute::CLStridedSlice::validate(&aclInputInfo,
46  &aclOutputInfo,
47  starts,
48  ends,
49  strides,
50  begin_mask,
51  end_mask,
52  shrink_axis_mask);
53 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
auto SetClStridedSliceData(const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)

◆ ClSubtractionValidate()

arm_compute::Status ClSubtractionValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 46 of file ClSubtractionWorkload.cpp.

Referenced by ClLayerSupport::IsSubtractionSupported(), and ClBackend::OptimizeSubgraphView().

50 {
51  const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
52  const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
53  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
54 
55  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
56  activationDescriptor);
57 
58  const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
59  &aclInput1Info,
60  &aclOutputInfo,
61  g_AclConvertPolicy,
62  activationInfo);
63 
64  return aclStatus;
65 }
Status
enumeration
Definition: Types.hpp:42
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ ClTensorHandleFactoryId()

constexpr const char* armnn::ClTensorHandleFactoryId ( )

Definition at line 15 of file ClTensorHandleFactory.hpp.

Referenced by ClTensorHandleFactory::GetIdStatic().

16 {
17  return "Arm/Cl/TensorHandleFactory";
18 }

◆ ClTransposeConvolution2dWorkloadValidate()

arm_compute::Status ClTransposeConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 26 of file ClTransposeConvolution2dWorkload.cpp.

Referenced by ClLayerSupport::IsTransposeConvolution2dSupported().

31 {
32  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
35 
36  arm_compute::TensorInfo aclBiasesInfo;
37  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
38 
39  if (descriptor.m_BiasEnabled)
40  {
41  ARMNN_ASSERT(biases.has_value());
42 
43  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
44  optionalAclBiasesInfo = &aclBiasesInfo;
45  }
46 
47  arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(descriptor);
48 
49  return arm_compute::CLDeconvolutionLayer::validate(&aclInputInfo,
50  &aclWeightsInfo,
51  optionalAclBiasesInfo,
52  &aclOutputInfo,
53  padStrideInfo);
54 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ ClTransposeWorkloadValidate()

arm_compute::Status ClTransposeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeDescriptor descriptor 
)

Definition at line 17 of file ClTransposeWorkload.cpp.

Referenced by ClLayerSupport::IsTransposeSupported().

20 {
21  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
24 
25  return arm_compute::CLPermute::validate(&aclInputInfo, &aclOutputInfo,
26  armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
27 }

◆ ClUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status ClUnidirectionalSequenceLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo output,
const Optional< TensorInfo > &  hiddenStateOutput,
const Optional< TensorInfo > &  cellStateOutput,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 508 of file ClUnidirectionalSequenceLstmFloatWorkload.cpp.

References TensorInfo::GetShape(), IgnoreUnused(), and LstmDescriptor::m_TimeMajor.

Referenced by ClLayerSupport::IsUnidirectionalSequenceLstmSupported().

516 {
517  IgnoreUnused(hiddenStateOutput, cellStateOutput);
518 
519  TensorShape inputLayerShape = input.GetShape();
520  TensorShape outputLayerShape = outputStateIn.GetShape();
521 
522  unsigned int maxTime = descriptor.m_TimeMajor?inputLayerShape[0]:inputLayerShape[1];
523  unsigned int batchSize = descriptor.m_TimeMajor?inputLayerShape[1]:inputLayerShape[0];
524  unsigned int inputSize = inputLayerShape[2];
525  unsigned int outputSize = outputLayerShape[2];
526 
527  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
528  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
529 
530  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
531  "Permute1 status");
532  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
533  "Split status");
534  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
535  "LSTM status");
536  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Concat status");
538  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "Permute2 status");
540 
541  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
542  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
543 
544  //
545  // Permute validate
546  //
547  TensorInfo permuteOutInfo = TensorInfo(input);
548  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
549  if (!descriptor.m_TimeMajor)
550  {
551  statusPermute1 = arm_compute::CLPermute::validate(&aclInputInfo,
552  &aclPermuteOutInfo,
553  arm_compute::PermutationVector(0U, 2U, 1U));
554  }
555 
556  //
557  // Split and Concat Tensors validate
558  //
559  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
560  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
561  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
562  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
563  splitterOutputsTensorInfos.reserve(maxTime);
564  concatInputsTensorInfos.reserve(maxTime);
565  for (unsigned int i = 0; i < maxTime; ++i)
566  {
567  arm_compute::TensorInfo splitter_out;
568  arm_compute::TensorInfo concat_in;
569 
570  auto splitterTensorInfo = TensorInfo(input);
571  auto concatTensorInfo = TensorInfo(output);
572  splitterTensorInfo.SetShape({batchSize, inputSize});
573  concatTensorInfo.SetShape({batchSize, outputSize});
574 
575  arm_compute::TensorInfo aclSplitterTensorInfo
576  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
577  arm_compute::TensorInfo aclConcatTensorInfo
578  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
579 
580  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
581  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
582  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
583  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
584  }
585 
586  //
587  // Split validate
588  //
589  unsigned int numberDimensions = 3;
590  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
591  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
592 
593  if (maxTime != 1) // ACL split does not work with only one element to split.
594  {
595  if (!descriptor.m_TimeMajor)
596  {
597  statusSplit = arm_compute::CLSplit::validate(&aclPermuteOutInfo,
598  splitterOutputsTensorInfosPtr,
599  aclAxisSplit);
600  }
601  else
602  {
603  statusSplit = arm_compute::CLSplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
604  }
605  }
606 
607  //
608  // LSTM validate
609  //
610 
611  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
612 
613  const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
614  const TensorInfo& outputStateOut = TensorInfo(outputStateIn.GetShape(), input.GetDataType());
615  const TensorInfo& cellStateOut = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
616 
617  // The inputs and outputs
618  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
619  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
620  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
621  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
622  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
623 
624  // Basic parameters
625  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
626  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
627  const arm_compute::TensorInfo aclInputToCellWeightsInfo
628  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
629  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
630  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
631  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
632  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
633  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
634  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
635  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
636  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
637  const arm_compute::TensorInfo aclForgetGateBiasInfo
638  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
639  const arm_compute::TensorInfo aclCellBiasInfo
640  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
641  const arm_compute::TensorInfo aclOutputGateBiasInfo
642  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
643 
644  arm_compute::TensorInfo aclInputToInputWeightsInfo;
645  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
646  arm_compute::TensorInfo aclCellToInputWeightsInfo;
647  arm_compute::TensorInfo aclInputGateBiasInfo;
648  arm_compute::TensorInfo aclProjectionWeightsInfo;
649  arm_compute::TensorInfo aclProjectionBiasInfo;
650  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
651  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
652 
653  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
654  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
655  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
656  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
657 
658 
659  if (!descriptor.m_CifgEnabled)
660  {
661  if (descriptor.m_PeepholeEnabled)
662  {
663  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
664  }
665  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
666  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
667  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
668 
669  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
670  &aclRecurrentToInputWeightsInfo,
671  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
672  &aclInputGateBiasInfo);
673  }
674 
675  if (descriptor.m_ProjectionEnabled)
676  {
677  if (paramsInfo.m_ProjectionBias != nullptr)
678  {
679  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
680  }
681  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
682 
683  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
684  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
685  }
686 
687  if (descriptor.m_PeepholeEnabled)
688  {
689  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
690  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
691 
692  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
693  }
694 
695  if (descriptor.m_LayerNormEnabled)
696  {
697  if (!descriptor.m_CifgEnabled)
698  {
699  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
700  }
701  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
702  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
703  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
704 
705  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
706  &aclInputLayerNormWeightsInfo,
707  &aclForgetLayerNormWeightsInfo,
708  &aclCellLayerNormWeightsInfo,
709  &aclOutputLayerNormWeightsInfo);
710  }
711 
712  // Need to be set at negative threshold to be compatible for ACL
713  float cell_threshold = descriptor.m_ClippingThresCell;
714  float projection_threshold = descriptor.m_ClippingThresProj;
715 
716  arm_compute::ActivationLayerInfo activationLayerInfo =
717  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
718 
719  for (unsigned int i = 0; i != maxTime; ++i)
720  {
721 
722  // Set LSTM input and output ITensors depending on:
723  // input format (timeMajor) & number of LSTM batches (maxTime).
724  arm_compute::ITensorInfo* outputLSTM;
725  arm_compute::ITensorInfo* inputLSTM;
726  // If there is only one LSTM time major batch, we will not concat OR permute.
727  // Set input of LSTM to be first input ITensor.
728  // Set output of LSTM to be final output ITensor.
729  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
730  if (maxTime == 1 && !descriptor.m_TimeMajor)
731  {
732  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
733  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
734  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
735  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
736  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
737  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
738  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
739  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
740  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
741  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
742  }
743  // If there is only one LSTM batch major batch, we will not concat, only permute.
744  // Set input of LSTM to be output of initial permute.
745  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
746  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
747  else if (maxTime == 1 && !descriptor.m_TimeMajor)
748  {
749  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
750  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
751  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
752  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
753  inputLSTM = &aclPermuteOutInfo;
754  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
755  }
756  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
757  else
758  {
759  inputLSTM = splitterOutputsTensorInfosPtr[i];
760  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
761  }
762 
763  statusLSTM = arm_compute::CLLSTMLayer::validate(inputLSTM,
764  &aclInputToForgetWeightsInfo,
765  &aclInputToCellWeightsInfo,
766  &aclInputToOutputWeightsInfo,
767  &aclRecurrentToForgetWeightsInfo,
768  &aclRecurrentToCellWeightsInfo,
769  &aclRecurrentToOutputWeightsInfo,
770  &aclForgetGateBiasInfo,
771  &aclCellBiasInfo,
772  &aclOutputGateBiasInfo,
773  &aclOutputStateInInfo,
774  &aclCellStateInInfo,
775  &aclScratchBufferInfo,
776  &aclOutputStateOutInfo,
777  &aclCellStateOutInfo,
778  outputLSTM,
779  lstm_params_info,
780  activationLayerInfo,
781  cell_threshold,
782  projection_threshold);
783 
784  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
785  {
786  break;
787  }
788  }
789 
790  //
791  // Concat validate
792  //
793 
794  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
795  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
796  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
797  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
798 
799  TensorInfo concatOuputTensorInfo = TensorInfo(output);
800  concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
801  arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
802 
803  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
804  {
805  for (unsigned int i = 0; i < maxTime; ++i)
806  {
807  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
808  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
809  }
810 
811  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
812  if (!descriptor.m_TimeMajor)
813  {
814  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
815  &aclConcatOuputTensorInfo,
816  aclAxisConcat);
817  }
818  else
819  {
820  statusConcat = arm_compute::CLConcatenateLayer::validate(concatInputsTensorInfosPtr,
821  &aclOutputInfo,
822  aclAxisConcat);
823  }
824  }
825  // If only one LSTM batch, we do not concat and/or permute.
826  // Must ensure final output info is expanded to correct batch major dimensions.
827  else
828  {
829  if (!descriptor.m_TimeMajor)
830  {
831  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
832  BuildArmComputeTensorShape(shapeExpandBatchMajor));
833  }
834  else
835  {
836  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
837  BuildArmComputeTensorShape(shapeExpandTimeMajor));
838  }
839  }
840  //
841  // Permute validate
842  //
843  if (!descriptor.m_TimeMajor)
844  {
845  // Output now time major. Permute output back to batch major.
846  if (maxTime != 1)
847  {
848  statusPermute2 = arm_compute::CLPermute::validate(&aclConcatOuputTensorInfo,
849  &aclOutputInfo,
850  arm_compute::PermutationVector(0U, 2U, 1U));
851  }
852  else
853  {
854  statusPermute2 = arm_compute::CLPermute::validate(concatInputsTensorInfosPtr[0],
855  &aclOutputInfo,
856  arm_compute::PermutationVector(0U, 2U, 1U));
857  }
858  }
859 
860  auto okCode = arm_compute::ErrorCode::OK;
861  if (statusPermute1.error_code() == okCode &&
862  statusSplit.error_code() == okCode &&
863  statusLSTM .error_code() == okCode &&
864  statusConcat.error_code() == okCode &&
865  statusPermute2.error_code() == okCode)
866  {
867  return arm_compute::Status(arm_compute::ErrorCode::OK,
868  "All Unidirectional Sequence LSTM layer validate status OK.");
869  }
870  else
871  {
872  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
873  "Unidirectional Sequence LSTM layer validate status failed.");
874  }
875 }
void IgnoreUnused(Ts &&...)
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
Status
enumeration
Definition: Types.hpp:42
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:19

◆ Combine() [1/2]

MemorySourceFlags armnn::Combine ( Arg  sourceA,
Arg  sourceB 
)

Definition at line 30 of file MemorySources.hpp.

Referenced by Combine().

31 {
32  return static_cast<MemorySourceFlags>(sourceA) | static_cast<MemorySourceFlags>(sourceB);
33 }
unsigned int MemorySourceFlags

◆ Combine() [2/2]

MemorySourceFlags armnn::Combine ( Arg  source,
Args...  rest 
)

Definition at line 36 of file MemorySources.hpp.

References Combine().

37 {
38  return static_cast<MemorySourceFlags>(source) | Combine(rest...);
39 }
MemorySourceFlags Combine(Arg source, Args... rest)
unsigned int MemorySourceFlags

◆ ComputeAclAxis()

int armnn::ComputeAclAxis ( const int &  armnnAxis,
const armnn::TensorInfo tensor 
)
inline

Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)

Definition at line 264 of file ArmComputeUtils.hpp.

References ARMNN_ASSERT, and TensorInfo::GetNumDimensions().

Referenced by ClGatherWorkload::ClGatherWorkload(), ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(), ClSoftmaxWorkload::ClSoftmaxWorkload(), NeonGatherWorkload::NeonGatherWorkload(), NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(), and NeonSoftmaxWorkload::NeonSoftmaxWorkload().

265 {
266  int rank = static_cast<int>(tensor.GetNumDimensions());
267 
268  ARMNN_ASSERT(rank != 0);
269  ARMNN_ASSERT((-1 * rank) <= armnnAxis);
270  ARMNN_ASSERT(armnnAxis < rank);
271 
272  int sign = (armnnAxis < 0) ? -1 : 1;
273  int aclAxis = sign * rank - 1 - armnnAxis;
274 
275  return aclAxis;
276 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:195

◆ ComputeConv3DInfo() [1/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo ( const armnn::Convolution3dDescriptor  descriptor,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)
inline

Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.

Definition at line 293 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo(), Convolution3dDescriptor::m_DilationX, Convolution3dDescriptor::m_DilationY, Convolution3dDescriptor::m_DilationZ, Convolution3dDescriptor::m_PadBack, Convolution3dDescriptor::m_PadBottom, Convolution3dDescriptor::m_PadFront, Convolution3dDescriptor::m_PadLeft, Convolution3dDescriptor::m_PadRight, Convolution3dDescriptor::m_PadTop, Convolution3dDescriptor::m_StrideX, Convolution3dDescriptor::m_StrideY, and Convolution3dDescriptor::m_StrideZ.

296 {
297  const arm_compute::Size3D stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
298  const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
299  descriptor.m_PadTop, descriptor.m_PadBottom,
300  descriptor.m_PadFront, descriptor.m_PadBack};
301  const arm_compute::Size3D dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
302 
303  const arm_compute::ActivationLayerInfo activationInfo =
305  const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
306 
307  return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
308 }
uint32_t m_PadBack
Padding back value in the depth dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_DilationX
Dilation along x axis.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
uint32_t m_PadFront
Padding front value in the depth dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_PadRight
Padding right value in the width dimension.
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor *activationDescPtr)
uint32_t m_PadTop
Padding top value in the height dimension.
uint32_t m_DilationZ
Dilation along z axis.
uint32_t m_StrideZ
Stride value when proceeding through input for the depth dimension.
uint32_t m_DilationY
Dilation along y axis.

◆ ComputeConv3DInfo() [2/2]

arm_compute::Conv3dInfo armnn::ComputeConv3DInfo ( const armnn::Convolution3dQueueDescriptor  queueDescriptor,
bool  isFastMathEnabled 
)
inline

Definition at line 310 of file ArmComputeUtils.hpp.

References ConvertAdditionalInfoToAclActivationLayerInfo(), QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Convolution3dDescriptor::m_StrideX.

312 {
313  auto descriptor = queueDescriptor.m_Parameters;
314  const arm_compute::Size3D stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
315  const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
316  descriptor.m_PadTop, descriptor.m_PadBottom,
317  descriptor.m_PadFront, descriptor.m_PadBack};
318  const arm_compute::Size3D dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
319 
320  const arm_compute::ActivationLayerInfo activationInfo =
322  const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
323 
324  return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
325 }
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)

◆ ComputePositiveAxis()

unsigned int armnn::ComputePositiveAxis ( const int &  axis,
const armnn::TensorInfo tensor 
)
inline

Function to convert axis to its positive equivalent value.

[-rank, rank) –> [0, rank)

Definition at line 280 of file ArmComputeUtils.hpp.

References ARMNN_ASSERT, and TensorInfo::GetNumDimensions().

281 {
282  int rank = static_cast<int>(tensor.GetNumDimensions());
283 
284  ARMNN_ASSERT(rank != 0);
285  ARMNN_ASSERT((-1 * rank) <= axis);
286  ARMNN_ASSERT(axis < rank);
287 
288  int positiveAxis = (axis < 0) ? rank + axis : axis;
289  return static_cast<unsigned int>(positiveAxis);
290 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:195

◆ ComputeReductionTensorShape()

const TensorInfo armnn::ComputeReductionTensorShape ( const armnn::TensorInfo input,
const std::vector< uint32_t > &  vAxis,
const bool  keepDims 
)
inline

Function to compute the output tensor shape based on the axes and if keepDims is set.

Definition at line 352 of file ArmComputeUtils.hpp.

References TensorInfo::GetNumDimensions(), and numeric_cast().

Referenced by ChainReduceLayers().

355 {
356  auto reducedTensorInfo = input;
357  unsigned int rank = reducedTensorInfo.GetNumDimensions();
358  unsigned int outputRank = 0;
359  // Calculate output dimension
360  if (keepDims)
361  {
362  outputRank = rank;
363  }
364  else if (vAxis.empty())
365  {
366  outputRank = 1;
367  }
368  else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
369  {
370  throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
371  }
372  else
373  {
374  outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
375  if (outputRank == 0)
376  {
377  outputRank = 1;
378  }
379  }
380  std::vector<unsigned int> dimSizes(outputRank, 1);
381  if (!vAxis.empty())
382  {
383  // Skip the dimension that has been reduced unless keepDims is true.
384  unsigned int outputIndex = 0;
385  for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
386  {
387  if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
388  {
389  dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
390  ++outputIndex;
391  }
392  else if (keepDims)
393  {
394  dimSizes[outputIndex] = 1;
395  ++outputIndex;
396  }
397  }
398  }
399  const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
400  reducedTensorInfo.SetShape(inferredShape);
401  return reducedTensorInfo;
402 }
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:195

◆ ComputeSoftmaxAclAxis()

T armnn::ComputeSoftmaxAclAxis ( const SoftmaxDescriptor softmaxDesc,
const armnn::TensorInfo tensor 
)
inline

Definition at line 225 of file ArmComputeUtils.hpp.

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), and SoftmaxDescriptor::m_Axis.

226 {
227  // Detect the Android default value of -1 and return the ACL default value of 0.
228  if (softmaxDesc.m_Axis == -1)
229  {
230  return 0;
231  }
232 
233  unsigned int dim = tensor.GetNumDimensions();
234 
235  ARMNN_ASSERT(dim != 0);
236 
237  // Currently ArmNN support axis 1.
238  auto aclAxis = (static_cast<T>(dim) - 1);
239  aclAxis = aclAxis > 0 ? aclAxis -1 : aclAxis;
240 
241  return aclAxis;
242 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:195

◆ ComputeSplitAxis()

std::set<unsigned int> armnn::ComputeSplitAxis ( const armnn::SplitterDescriptor desc,
const TensorShape input 
)
inline

Definition at line 244 of file ArmComputeUtils.hpp.

References ViewsDescriptor::GetNumDimensions(), ViewsDescriptor::GetNumViews(), and ViewsDescriptor::GetViewSizes().

Referenced by ClSplitterWorkload::ClSplitterWorkload(), SplitterLayer::CreateWorkload(), ClLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsSplitterSupported(), and NeonSplitterWorkload::NeonSplitterWorkload().

245 {
246  unsigned int numSplit = desc.GetNumViews();
247  unsigned int numDimensions = desc.GetNumDimensions();
248  std::set<unsigned int> splitAxis;
249 
250  for (unsigned int i = 0; i < numSplit; ++i)
251  {
252  for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
253  {
254  if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
255  {
256  splitAxis.insert(dimIdx);
257  }
258  }
259  }
260  return splitAxis;
261 }
uint32_t GetNumDimensions() const
Get the number of dimensions.
uint32_t GetNumViews() const
Get the number of views.
const uint32_t * GetViewSizes(uint32_t idx) const
Get the view sizes at the int value idx.

◆ Concatenate()

void Concatenate ( const ConcatQueueDescriptor data,
std::vector< ITensorHandle *>  inputs,
std::vector< ITensorHandle *>  outputs 
)

Definition at line 14 of file Concatenate.cpp.

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), GetTensorInfo(), ConcatQueueDescriptor::ViewOrigin::m_Origin, ConcatQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

Referenced by RefConcatWorkload::ExecuteAsync().

17 {
18  const TensorInfo& outputInfo0 = GetTensorInfo(outputs[0]);
19 
20  std::unique_ptr<Encoder<float>> encoderPtr = MakeEncoder<float>(outputInfo0, outputs[0]->Map());
21  Encoder<float>& encoder = *encoderPtr;
22 
23  for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
24  {
25  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
26 
27  unsigned int indexRemainder = index;
28  unsigned int dimensionStride = outputInfo0.GetNumElements();
29 
30  for (unsigned int i = 0; i < outputInfo0.GetNumDimensions(); i++)
31  {
32  dimensionStride /= outputInfo0.GetShape()[i];
33  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
34  indexRemainder -= indices[i] * dimensionStride;
35  }
36 
37  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
38  {
39  ConcatQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
40 
41  //Split view extents are defined by the size of (the corresponding) input tensor.
42  const TensorInfo& inputInfo = GetTensorInfo(inputs[viewIdx]);
43  ARMNN_ASSERT(inputInfo.GetNumDimensions() == outputInfo0.GetNumDimensions());
44 
45  // Check all dimensions to see if this element is inside the given input view.
46  bool insideView = true;
47  for (unsigned int i = 0; i < inputInfo.GetNumDimensions(); i++)
48  {
49  if (indices[i] < view.m_Origin[i])
50  {
51  insideView = false;
52  }
53  if (indices[i] >= view.m_Origin[i] + inputInfo.GetShape()[i])
54  {
55  insideView = false;
56  }
57  }
58 
59  if (insideView)
60  {
61  std::unique_ptr<Decoder<float>> decoderPtr =
62  MakeDecoder<float>(inputInfo,inputs[viewIdx]->Map());
63  Decoder<float>& decoder = *decoderPtr;
64  unsigned int inIndex = 0;
65  unsigned int dimensionStride = 1;
66 
67  for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;)
68  {
69  inIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
70  dimensionStride *= inputInfo.GetShape()[i];
71  }
72  decoder += inIndex;
73  encoder.Set(decoder.Get());
74 
75  //What should we do if input views overlap on the output tensor?
76  //We could error, take the average, or shm else...
77  //For now just stop after finding first view (input) that matches.
78  break;
79  }
80  }
81  ++encoder;
82  }
83 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31

◆ ConditionalThrow() [1/2]

void armnn::ConditionalThrow ( bool  condition,
const std::string &  message 
)

Definition at line 165 of file Exceptions.hpp.

166 {
167  if (!condition)
168  {
169  throw ExceptionType(message);
170  }
171 }

◆ ConditionalThrow() [2/2]

void armnn::ConditionalThrow ( bool  condition)

Definition at line 174 of file Exceptions.hpp.

175 {
176  if (!condition)
177  {
178  throw ExceptionType();
179  }
180 }

◆ ConditionalThrowIfNotEqual()

void armnn::ConditionalThrowIfNotEqual ( const std::string &  message,
const ComparedType &  leftHandSide,
const ComparedType &  rightHandSide 
)

ComparedType must support: operator==(const ComparedType&) operator<<(ostream&, const ComparedType&)

Definition at line 189 of file Exceptions.hpp.

192 {
193  if (!(leftHandSide == rightHandSide))
194  {
195  std::stringstream ss;
196  ss << message << " : " << leftHandSide << " != " << rightHandSide;
197  throw ExceptionType(ss.str());
198  }
199 }

◆ ConfigureDetailsObject()

void armnn::ConfigureDetailsObject ( JsonChildObject detailsObject,
std::string  layerDetailsStr 
)

Definition at line 295 of file Profiling.cpp.

References ExecObjectDesc, JsonChildObject::SetAndParseDetails(), and JsonChildObject::SetType().

297 {
298  detailsObject.SetType(JsonObjectType::ExecObjectDesc);
299  detailsObject.SetAndParseDetails(layerDetailsStr);
300 
301 }

◆ ConfigureLogging()

void ConfigureLogging ( bool  printToStandardOutput,
bool  printToDebugOutput,
LogSeverity  severity 
)

Configures the logging behaviour of the ARMNN library.

printToStandardOutput: Set to true if log messages should be printed to the standard output. printToDebugOutput: Set to true if log messages be printed to a platform-specific debug output (where supported). severity: All log messages that are at this severity level or higher will be printed, others will be ignored.

Examples:
AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, and SimpleSample.cpp.

Definition at line 18 of file Utils.cpp.

References SetAllLoggingSinks(), SetLogFilter(), and Trace.

Referenced by ConfigureLoggingTest(), ProfilingServiceRuntimeHelper::ForceTransitionToState(), armnn::test::InferenceTestMain(), and main().

19 {
20  SetAllLoggingSinks(printToStandardOutput, printToDebugOutput, false);
21  SetLogFilter(severity);
22 }
void SetAllLoggingSinks(bool standardOut, bool debugOut, bool coloured)
Definition: Logging.cpp:191
void SetLogFilter(LogSeverity level)
Definition: Logging.cpp:73

◆ ConfigureTuner()

void armnn::ConfigureTuner ( arm_compute::CLTuner &  tuner,
TuningLevel  level 
)

Definition at line 115 of file ClBackendContext.cpp.

References ARMNN_LOG, Exhaustive, info, None, Normal, and Rapid.

Referenced by ClBackendContext::ClBackendContext().

116 {
117  tuner.set_tune_new_kernels(true); // Turn on tuning initially.
118 
119  switch (level)
120  {
121  case TuningLevel::Rapid:
122  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)";
123  tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID);
124  break;
125  case TuningLevel::Normal:
126  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)";
127  tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL);
128  break;
129  case TuningLevel::Exhaustive:
130  ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)";
131  tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE);
132  break;
133  case TuningLevel::None:
134  default:
135  tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode.
136  break;
137  }
138 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

◆ Convert1HWOTensorInfoToAcl()

std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl ( const TensorInfo weightInfo,
const TensorInfo inputInfo,
const DataLayout  dataLayout 
)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library Returns a tuple of converted weights tensor info and depth multiplier.

Definition at line 170 of file WorkloadUtils.cpp.

References GetDataLayoutName(), TensorInfo::GetShape(), NCHW, NHWC, and armnnUtils::Permuted().

Referenced by GatherTensorHandlePairs().

173 {
174  unsigned int aclDepthMultiplier = 1;
175  TensorInfo weightsPermuted;
176  if (dataLayout == armnn::DataLayout::NHWC)
177  {
178  // No permutation required. Input and weights data layouts are the same.
179  aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[3];
180  weightsPermuted = weightInfo;
181  }
182 
183  else if (dataLayout == armnn::DataLayout::NCHW)
184  {
185  // Weights permutation required. Weights [N,H,W,C] and input [N,C,H,W] data layouts are different.
186  // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
187  aclDepthMultiplier = weightInfo.GetShape()[3] / inputInfo.GetShape()[1];
188  PermutationVector permutationVector{ 0, 2, 3, 1 };
189  weightsPermuted = armnnUtils::Permuted(weightInfo, permutationVector);
190  }
191  else
192  {
193  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor info conversion: {}",
194  GetDataLayoutName(dataLayout)));
195  }
196 
197  return std::make_tuple(weightsPermuted, aclDepthMultiplier);
198 }
constexpr const char * GetDataLayoutName(DataLayout dataLayout)
Definition: TypesUtils.hpp:222
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98

◆ Convert1HWOTensorToAcl()

std::tuple< ConstTensor, unsigned int > Convert1HWOTensorToAcl ( const ConstTensorHandle weightTensor,
const TensorInfo inputInfo,
const DataLayout  dataLayout,
void *  permuteBuffer 
)

Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a ConstCpuTensorHandle from [1,H,W,I*M] to [1,I*M,H,W] (if NCHW) or keeps it at [1,H,W,I*M] (if NHWC) as required by the compute library.

Parameters
weightTensor- ConstTensorHandle of weights tensor
inputInfo- TensorInfo of input tensor
dataLayout- DataLayout of the input tensor
permuteBuffer- Pointer to memory with the size of tensor. Used for the permutation
Returns
tuple of transformed weights-ConstTensor and depthwise multiplier

Definition at line 139 of file WorkloadUtils.cpp.

References GetDataLayoutName(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, and PermuteTensor().

Referenced by GatherTensorHandlePairs().

143 {
144  TensorInfo weightsInfo = weightTensor->GetTensorInfo();
145  unsigned int depthMultiplier = 1;
146  PermutationVector permutationVector{};
147  if (dataLayout == armnn::DataLayout::NHWC)
148  {
149  // No permutation required. Data layouts are the same.
150 
151  depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[3];
152  }
153  else if (dataLayout == armnn::DataLayout::NCHW)
154  {
155  // [ 1, H, W, I*M] --> [ 1, I * M, H, W ]
156  depthMultiplier = weightsInfo.GetShape()[3] / inputInfo.GetShape()[1];
157  permutationVector = { 0, 2, 3, 1 };
158  }
159  else
160  {
161  throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
162  GetDataLayoutName(dataLayout)));
163  }
164 
165  ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
166 
167  return std::make_tuple(weightsPermuted, depthMultiplier);
168 }
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
constexpr const char * GetDataLayoutName(DataLayout dataLayout)
Definition: TypesUtils.hpp:222

◆ Convert1HWOtoMIHW()

std::tuple< ConstTensor, unsigned int > Convert1HWOtoMIHW ( const ConstTensorHandle weightTensor,
const TensorInfo inputInfo,
const DataLayout dataLayout,
void *  permuteBuffer 
)

Converts a (weights) tensor from [1, H, W, I*M] = [1, H, W, O] to [M, I, H, W].

Parameters
weightTensor- ConstTensorHandle of the weight tensor that should be converted
inputInfo- TensorInfo of the corresponding input tensor
dataLayout- DataLayout of the input tensor e.g. NHWC or NCHW
permuteBuffer- Memory location with the same size as the weight tensor to write converted data to
Returns
- A tuple of ConstTensor and unsigned int which is the converted weightTensor and the depthMultiplier

Definition at line 201 of file WorkloadUtils.cpp.

References DataLayoutIndexed::GetChannelsIndex(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), TensorInfo::HasPerAxisQuantization(), PermuteTensor(), and TensorInfo::SetShape().

Referenced by GatherTensorHandlePairs().

205 {
206  TensorInfo weightsInfo = weightTensor->GetTensorInfo();
207 
208  if (weightsInfo.HasPerAxisQuantization())
209  {
210  throw InvalidArgumentException("Can't convert tensor from [1,H,W,Cout] to [M,Cin,H,W] when per channel "
211  "quantization is applied.");
212  }
213 
214  // Reshape weights [ 1, H, W, I*M ] --> [ H, W, I, M ]
215  auto weightsShape = weightsInfo.GetShape();
216  auto channelIndex = armnnUtils::DataLayoutIndexed(dataLayout).GetChannelsIndex();
217  unsigned int depthMultiplier = weightsShape[3] / inputInfo.GetShape()[channelIndex];
218  weightsInfo.SetShape({ weightsShape[1],
219  weightsShape[2],
220  inputInfo.GetShape()[channelIndex],
221  depthMultiplier});
222 
223  // Permute [ H, W, I, M ] --> [ M, I, H, W ]
224  PermutationVector permutationVector = { 2, 3, 1, 0 };
225  ConstTensor weightsPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
226 
227  return std::make_tuple(weightsPermuted, depthMultiplier);
228 }
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
unsigned int GetChannelsIndex() const

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [1/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor actDesc)
inline

◆ ConvertActivationDescriptorToAclActivationLayerInfo() [2/2]

arm_compute::ActivationLayerInfo armnn::ConvertActivationDescriptorToAclActivationLayerInfo ( const ActivationDescriptor activationDescPtr)
inline

Definition at line 92 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo().

93 {
94  if (activationDescPtr != nullptr)
95  {
96  return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
97  *activationDescPtr));
98  }
99  return arm_compute::ActivationLayerInfo();
100 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor *activationDescPtr)

◆ ConvertActivationFunctionToAclActivationFunction()

arm_compute::ActivationLayerInfo::ActivationFunction armnn::ConvertActivationFunctionToAclActivationFunction ( ActivationFunction  armnnFunction)
inline

Definition at line 61 of file ArmComputeUtils.hpp.

References Abs, BoundedReLu, Elu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by ConvertActivationDescriptorToAclActivationLayerInfo().

62 {
63  using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
64 
65  switch (armnnFunction)
66  {
67  case ActivationFunction::Linear: return AclActivationFunction::LINEAR;
68  // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
69  case ActivationFunction::Sigmoid: return AclActivationFunction::LOGISTIC;
70  case ActivationFunction::ReLu: return AclActivationFunction::RELU;
71  case ActivationFunction::BoundedReLu: return AclActivationFunction::LU_BOUNDED_RELU;
72  case ActivationFunction::SoftReLu: return AclActivationFunction::SOFT_RELU;
73  case ActivationFunction::LeakyReLu: return AclActivationFunction::LEAKY_RELU;
74  case ActivationFunction::Abs: return AclActivationFunction::ABS;
75  case ActivationFunction::Sqrt: return AclActivationFunction::SQRT;
76  case ActivationFunction::Square: return AclActivationFunction::SQUARE;
77  case ActivationFunction::TanH: return AclActivationFunction::TANH;
78  case ActivationFunction::Elu: return AclActivationFunction::ELU;
79  case ActivationFunction::HardSwish: return AclActivationFunction::HARD_SWISH;
80  default: throw InvalidArgumentException("Unsupported activation function");
81  }
82 }
ActivationFunction
Definition: Types.hpp:86

◆ ConvertAdditionalInfoToAclActivationLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertAdditionalInfoToAclActivationLayerInfo ( const QueueDescriptor queueDescriptor)
inline

Definition at line 103 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo(), and QueueDescriptor::GetAdditionalInformation().

Referenced by ClAdditionWorkload::ClAdditionWorkload(), ClDivisionWorkload::ClDivisionWorkload(), ClFullyConnectedWorkload::ClFullyConnectedWorkload(), ClMultiplicationWorkload::ClMultiplicationWorkload(), ClSubtractionWorkload::ClSubtractionWorkload(), ComputeConv3DInfo(), NeonAdditionWorkload::NeonAdditionWorkload(), NeonDivisionWorkload::NeonDivisionWorkload(), NeonMultiplicationWorkload::NeonMultiplicationWorkload(), and NeonSubtractionWorkload::NeonSubtractionWorkload().

104 {
105  const ActivationDescriptor* activationDescPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
106 
107  if (activationDescPtr != nullptr)
108  {
109  return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
110  *activationDescPtr));
111  }
112  return arm_compute::ActivationLayerInfo();
113 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor *activationDescPtr)

◆ ConvertBf16ToFp32Weight()

LayerT* armnn::ConvertBf16ToFp32Weight ( Layer l)

Definition at line 631 of file Network.cpp.

References BFloat16, FloatingPointConverter::ConvertBFloat16ToFloat32(), Convolution2d, Float32, FullyConnected, TensorInfo::GetDataType(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), and info.

632 {
633  LayerT* layer = PolymorphicDowncast<LayerT*>(l);
634  if ((layer->GetType() == LayerType::Convolution2d || layer->GetType() == LayerType::FullyConnected)
635  && layer->m_Weight)
636  {
637  const TensorInfo& info = layer->m_Weight->GetTensorInfo();
638 
639  if (info.GetDataType() == DataType::BFloat16)
640  {
641  std::vector<float> newValues(info.GetNumElements());
642 
644  layer->m_Weight->template GetConstTensor<armnn::BFloat16>(), info.GetNumElements(), newValues.data());
645 
646  TensorInfo newInfo(info.GetShape(), DataType::Float32);
647  ConstTensor newInput(newInfo, newValues);
648  layer->m_Weight.reset(new ScopedTensorHandle(newInput));
649  }
650  }
651  return layer;
652 }
static void ConvertBFloat16ToFloat32(const void *srcBFloat16Buffer, size_t numElements, float *dstFloat32Buffer)
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.

◆ ConvertComparisonOperationToAcl()

arm_compute::ComparisonOperation armnn::ConvertComparisonOperationToAcl ( const ComparisonDescriptor descriptor)
inline

Definition at line 139 of file ArmComputeUtils.hpp.

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, ComparisonDescriptor::m_Operation, and NotEqual.

Referenced by ClComparisonWorkload::ClComparisonWorkload(), and NeonComparisonWorkload::NeonComparisonWorkload().

140 {
141  switch (descriptor.m_Operation)
142  {
143  case ComparisonOperation::Greater: return arm_compute::ComparisonOperation::Greater;
144  case ComparisonOperation::GreaterOrEqual: return arm_compute::ComparisonOperation::GreaterEqual;
145  case ComparisonOperation::Less: return arm_compute::ComparisonOperation::Less;
146  case ComparisonOperation::LessOrEqual: return arm_compute::ComparisonOperation::LessEqual;
147  case ComparisonOperation::Equal: return arm_compute::ComparisonOperation::Equal;
148  case ComparisonOperation::NotEqual: return arm_compute::ComparisonOperation::NotEqual;
149  default: throw InvalidArgumentException("Unsupported comparison function");
150  }
151 }

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [1/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo ( const FullyConnectedDescriptor fullyConnectedDesc,
const ActivationDescriptor activationDesc 
)
inline

Definition at line 192 of file ArmComputeUtils.hpp.

References ConvertActivationDescriptorToAclActivationLayerInfo(), and FullyConnectedDescriptor::m_TransposeWeightMatrix.

Referenced by ClFullyConnectedWorkload::ClFullyConnectedWorkload().

194 {
195  arm_compute::FullyConnectedLayerInfo fc_info;
196  fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
197  fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
198  return fc_info;
199 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor *activationDescPtr)

◆ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo() [2/2]

arm_compute::FullyConnectedLayerInfo armnn::ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo ( const FullyConnectedDescriptor fullyConnectedDesc,
arm_compute::ActivationLayerInfo  activationLayerInfo 
)
inline

Definition at line 202 of file ArmComputeUtils.hpp.

References FullyConnectedDescriptor::m_TransposeWeightMatrix.

204 {
205  arm_compute::FullyConnectedLayerInfo fc_info;
206  fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
207  fc_info.activation_info = activationLayerInfo;
208  return fc_info;
209 }

◆ ConvertLogSeverity()

constexpr LogSeverity armnn::ConvertLogSeverity ( BoostLogSeverityMapping  severity)

Definition at line 199 of file Logging.hpp.

200 {
201  return static_cast<LogSeverity>(severity);
202 }
LogSeverity
Definition: Utils.hpp:14

◆ ConvertLstmActivationFuncToAclLayerInfo()

arm_compute::ActivationLayerInfo armnn::ConvertLstmActivationFuncToAclLayerInfo ( uint32_t  activationFunction)
inline

Definition at line 116 of file ArmComputeUtils.hpp.

117 {
118  // For preparing the object for the class ActivationLayerInfo, we need to consider 5 situations.
119  switch (activationFunction)
120  {
121  case 0:
122  return arm_compute::ActivationLayerInfo(); // no activation, do nothing
123  case 1:
124  return arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
125  case 3:
126  return arm_compute::ActivationLayerInfo(
127  arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
128  case 4:
129  return arm_compute::ActivationLayerInfo(
130  arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
131  case 6:
132  return arm_compute::ActivationLayerInfo(
133  arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
134  default:
135  throw armnn::Exception("Wrong Type of Activation Function!");
136  }
137 }
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

◆ ConvertMaskToACLFormat()

int32_t ConvertMaskToACLFormat ( int32_t  mask,
int32_t  numDim 
)

Definition at line 286 of file WorkloadUtils.cpp.

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload(), GatherTensorHandlePairs(), and NeonStridedSliceWorkload::NeonStridedSliceWorkload().

287 {
288  int32_t reversedMask = 0;
289  for (unsigned int i = 0; i < armnn::numeric_cast<unsigned int>(numDim); ++i)
290  {
291  // Check if bit set in mask for each dimension
292  int32_t bit = (mask & 1 << i) != 0;
293  // Increment the new mask with the bits reversed
294  reversedMask += (bit << std::max(numDim-(armnn::numeric_cast<int>(i)+1), 0));
295  }
296 
297  return reversedMask;
298 }

◆ ConvertNormalizationAlgorithmChannelToAclNormType()

arm_compute::NormType armnn::ConvertNormalizationAlgorithmChannelToAclNormType ( NormalizationAlgorithmChannel  channelType)
inline

Definition at line 180 of file ArmComputeUtils.hpp.

References Across, and Within.

181 {
182  using arm_compute::NormType;
183  switch (channelType)
184  {
185  case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
186  case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
187  default: throw InvalidArgumentException("Unsupported normalization algorithm channel type");
188  }
189 }

◆ ConvertOutputShapeRoundingToAclDimensionRoundingType()

arm_compute::DimensionRoundingType armnn::ConvertOutputShapeRoundingToAclDimensionRoundingType ( OutputShapeRounding  rounding)
inline

Definition at line 166 of file ArmComputeUtils.hpp.

References Ceiling, and Floor.

168 {
169  using arm_compute::DimensionRoundingType;
170 
171  switch (rounding)
172  {
173  case OutputShapeRounding::Ceiling: return DimensionRoundingType::CEIL;
174  case OutputShapeRounding::Floor: return DimensionRoundingType::FLOOR;
175  default: throw InvalidArgumentException("Unsupported Output Shape Rounding type");
176  }
177 }

◆ ConvertPaddingModeToAcl()

arm_compute::PaddingMode armnn::ConvertPaddingModeToAcl ( const PaddingMode paddingMode)
inline

Definition at line 327 of file ArmComputeUtils.hpp.

References Constant, Reflect, and Symmetric.

328 {
329  switch (paddingMode)
330  {
331  case PaddingMode::Constant: return arm_compute::PaddingMode::CONSTANT;
332  case PaddingMode::Reflect: return arm_compute::PaddingMode::REFLECT;
333  case PaddingMode::Symmetric: return arm_compute::PaddingMode::SYMMETRIC;
334  default: throw InvalidArgumentException("Unsupported Padding Mode");
335  }
336 }

◆ ConvertPoolingAlgorithmToAclPoolingType()

arm_compute::PoolingType armnn::ConvertPoolingAlgorithmToAclPoolingType ( PoolingAlgorithm  poolingAlgorithm)
inline

Definition at line 153 of file ArmComputeUtils.hpp.

References Average, L2, and Max.

154 {
155  using arm_compute::PoolingType;
156 
157  switch (poolingAlgorithm)
158  {
159  case PoolingAlgorithm::Max: return PoolingType::MAX;
160  case PoolingAlgorithm::Average: return PoolingType::AVG;
161  case PoolingAlgorithm::L2: return PoolingType::L2;
162  default: throw InvalidArgumentException("Unsupported pooling algorithm");
163  }
164 }

◆ ConvertReductionOperationToAcl()

arm_compute::ReductionOperation armnn::ConvertReductionOperationToAcl ( const ReduceDescriptor descriptor)
inline

Definition at line 338 of file ArmComputeUtils.hpp.

References ReduceDescriptor::m_ReduceOperation, Max, Mean, Min, Prod, and Sum.

339 {
340  switch (descriptor.m_ReduceOperation)
341  {
342  case ReduceOperation::Sum: return arm_compute::ReductionOperation::SUM;
343  case ReduceOperation::Mean: return arm_compute::ReductionOperation::MEAN_SUM;
344  case ReduceOperation::Max: return arm_compute::ReductionOperation::MAX;
345  case ReduceOperation::Min: return arm_compute::ReductionOperation::MIN;
346  case ReduceOperation::Prod: return arm_compute::ReductionOperation::PROD;
347  default: throw InvalidArgumentException("Unsupported Reduction operation");
348  }
349 }

◆ ConvertResizeMethodToAclInterpolationPolicy()

arm_compute::InterpolationPolicy armnn::ConvertResizeMethodToAclInterpolationPolicy ( ResizeMethod  resizeMethod)
inline

Definition at line 211 of file ArmComputeUtils.hpp.

References Bilinear, and NearestNeighbor.

212 {
213  switch (resizeMethod)
214  {
215  case ResizeMethod::Bilinear:
216  return arm_compute::InterpolationPolicy::BILINEAR;
217  case ResizeMethod::NearestNeighbor:
218  return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
219  default:
220  throw InvalidArgumentException("Unsupported resize method");
221  }
222 }

◆ ConvertWeightTensorFromArmnnToAcl()

armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl ( const ConstTensorHandle weightTensor,
DataLayout  dataLayout,
void *  permuteBuffer 
)

Definition at line 230 of file WorkloadUtils.cpp.

References ARMNN_ASSERT_MSG, Float16, Float32, BaseTensor< MemoryType >::GetDataType(), BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetShape(), ConstTensorHandle::GetTensorInfo(), NCHW, NHWC, PermuteTensor(), QAsymmS8, QAsymmU8, QSymmS8, and ReshapeWeightsForAcl().

Referenced by GatherTensorHandlePairs().

233 {
234  ARMNN_ASSERT_MSG(weightTensor, "Invalid input tensor");
235  ARMNN_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
236 
237  auto multiplier = weightTensor->GetTensorInfo().GetShape()[0];
238  auto inputChannels = weightTensor->GetTensorInfo().GetShape()[1];
239 
240  // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
241  // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
242 
243  // 1. Permute the weights if necessary
244  // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
245  // starting from the current shape of [ M, I, H, W ]
246  // If no permutation is necessary, leave the permutation vector empty
247  PermutationVector permutationVector{};
248  if (dataLayout == DataLayout::NHWC)
249  {
250  // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
251  permutationVector = { 3, 2, 0, 1 };
252  }
253  ConstTensor weightPermuted = PermuteTensor(weightTensor, permutationVector, permuteBuffer);
254 
255  // Shuffle the weights data to obtain the channel order needed used by Acl
256  if (multiplier > 1 && inputChannels > 1 && dataLayout == DataLayout::NCHW)
257  {
258  switch (weightPermuted.GetDataType())
259  {
260  case DataType::Float32:
261  weightPermuted = ReorderWeightChannelsForAcl<float>(weightPermuted, dataLayout, permuteBuffer);
262  break;
263  case DataType::Float16:
264  weightPermuted =
265  ReorderWeightChannelsForAcl<half_float::half>(weightPermuted, dataLayout, permuteBuffer);
266  break;
267  case DataType::QAsymmS8:
268  case DataType::QAsymmU8:
269  weightPermuted = ReorderWeightChannelsForAcl<uint8_t>(weightPermuted, dataLayout, permuteBuffer);
270  break;
271  case DataType::QSymmS8:
272  weightPermuted = ReorderWeightChannelsForAcl<int8_t>(weightPermuted, dataLayout, permuteBuffer);
273  break;
274  default:
275  break;
276  }
277  }
278 
279  // 2. Reshape the weights
280  ReshapeWeightsForAcl(weightPermuted.GetInfo(), dataLayout);
281 
282  // 3. Return both the tensor and the allocated storage to ensure that the data stays alive
283  return weightPermuted;
284 }
armnn::ConstTensor PermuteTensor(const ConstTensorHandle *tensor, const PermutationVector &permutationVector, void *permuteBuffer)
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)

◆ ConvertWeightTensorInfoFromArmnnToAcl()

TensorInfo ConvertWeightTensorInfoFromArmnnToAcl ( const TensorInfo weightInfo,
DataLayout  dataLayout 
)

Definition at line 115 of file WorkloadUtils.cpp.

References NHWC, armnnUtils::Permuted(), and ReshapeWeightsForAcl().

Referenced by GatherTensorHandlePairs().

116 {
117  // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
118  // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
119 
120  // 1. Permute the weights if necessary
121  // If the data layout is NCHW no permutation is necessary, as a reshape to [ 1, I * M, H, W ] can be better done
122  // starting from the current shape of [ M, I, H, W ]
123  TensorInfo weightPermutedInfo(weightInfo);
124  if (dataLayout == DataLayout::NHWC)
125  {
126  // The data layout is NHWC, then permute the weights from [ M, I, H, W ] to [ H, W, I, M ]
127  PermutationVector permutationVector{ 3, 2, 0, 1 };
128  weightPermutedInfo = armnnUtils::Permuted(weightInfo, permutationVector);
129  }
130 
131  // 2. Reshape the weights
132  ReshapeWeightsForAcl(weightPermutedInfo, dataLayout);
133 
134  // 3. Return the permuted weight info
135  return weightPermutedInfo;
136 }
void ReshapeWeightsForAcl(TensorInfo &weightInfo, DataLayout dataLayout)
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98

◆ Convolve()

void Convolve ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rFilterShape,
Decoder< float > &  rFilterDecoder,
bool  biasEnabled,
Decoder< float > *  pBiasDecoder,
DataLayout  dataLayout,
unsigned int  paddingTop,
unsigned int  paddingLeft,
unsigned int  xStride,
unsigned int  yStride,
unsigned int  xDilation,
unsigned int  yDilation,
bool  depthwise 
)

Definition at line 71 of file ConvImpl.cpp.

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NHWC, and Encoder< IType >::Set().

Referenced by RefDepthwiseConvolution2dWorkload::ExecuteAsync(), and RefConvolution2dWorkload::ExecuteAsync().

87 {
88  if (biasEnabled && !pBiasDecoder)
89  {
90  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
91  }
92  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
93 
94  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
95  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
96  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
97 
98  // Weights layout:
99  // Conv2d: [O,H,W,I]
100  // Depthwise: [1,H,W,O]
101  const unsigned int inputChannels = rInputShape[channelsIndex];
102  const unsigned int outputChannels = rOutputShape[channelsIndex];
103  const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
104 
105  const unsigned int batchSize = rOutputShape[0];
106  const unsigned int outputHeight = rOutputShape[heightIndex];
107  const unsigned int outputWidth = rOutputShape[widthIndex];
108  const unsigned int inputHeight = rInputShape[heightIndex];
109  const unsigned int inputWidth = rInputShape[widthIndex];
110 
111  const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
112  const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
113 
114  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
115  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape, depthwise);
116 
117  const TensorShape biasShape{outputChannels};
118  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
119 
120  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
121  {
122  for (unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
123  {
124  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
125  {
126  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
127  {
128  // This loop goes over each output element.
129  float sum = 0.0f;
130 
131  // For depthwise, each output channel corresponds to exactly one input channel.
132  // For normal, must loop over each input channel.
133  for (unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
134  {
135  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
136  {
137  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
138  {
139  // This loop goes over each input element for each output element.
140  unsigned int filterIndex = 0;
141 
142  // Since dimensionality of kernel depends on depthwiseness, so does index.
143  if (depthwise)
144  {
145  cInput = cOutput / depthMultiplier;
146  // filterDepth = outputChannels;
147  filterIndex = xFilter * outputChannels + cOutput +
148  yFilter * filterWidth * outputChannels;
149  }
150  else
151  {
152  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
153  // performance regression.
154  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
155  {
156  filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
157  yFilter * filterWidth * inputChannels +
158  xFilter * inputChannels +
159  cInput;
160  }
161  else
162  {
163  filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
164  cInput * filterWidth * filterHeight +
165  yFilter * filterWidth +
166  xFilter;
167  }
168  }
169 
170  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
171  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
172 
173  float inputValue;
174 
175  // Check if we're in the padding.
176  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
177  xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
178  {
179  inputValue = 0.0f;
180  }
181  else
182  {
183  unsigned int inputIndex = 0;
184 
185  // Keep this implementation, as using DataLayoutIndexed::GetIndex causes great
186  // performance regression.
187  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
188  {
189  inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
190  (yInput - paddingTop) * inputWidth * inputChannels +
191  (xInput - paddingLeft) * inputChannels +
192  cInput;
193  }
194  else
195  {
196  inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
197  inputWidth * inputHeight * cInput +
198  inputWidth * (yInput - paddingTop) +
199  xInput - paddingLeft;
200  }
201  inputValue = inputVec[inputIndex];
202  }
203 
204  sum += filterVec[filterIndex] * inputValue;
205  }
206  }
207  }
208 
209  if (biasEnabled)
210  {
211  sum += biasVec[cOutput];
212  }
213 
214  unsigned int outIdx;
215  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NHWC)
216  {
217  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
218  yOutput * outputWidth * outputChannels +
219  xOutput * outputChannels +
220  cOutput;
221  }
222  else
223  {
224  outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
225  cOutput * outputHeight * outputWidth +
226  yOutput * outputWidth +
227  xOutput;
228  }
229 
230  rOutputEncoder[outIdx];
231  rOutputEncoder.Set(sum);
232  }
233  }
234  }
235  }
236 }
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...

◆ Convolve3d()

void Convolve3d ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rFilterShape,
Decoder< float > &  rFilterDecoder,
bool  biasEnabled,
Decoder< float > *  pBiasDecoder,
DataLayout  dataLayout,
unsigned int  paddingTop,
unsigned int  paddingLeft,
unsigned int  paddingFront,
unsigned int  xStride,
unsigned int  yStride,
unsigned int  zStride,
unsigned int  xDilation,
unsigned int  yDilation,
unsigned int  zDilation 
)

Definition at line 11 of file Conv3dImpl.cpp.

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), NDHWC, and Encoder< IType >::Set().

Referenced by RefConvolution3dWorkload::ExecuteAsync().

29 {
30  if (biasEnabled && !pBiasDecoder)
31  {
32  throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
33  }
34  const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
35 
36  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
37  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
38  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
39  const unsigned int depthIndex = dataLayoutIndexed.GetDepthIndex();
40 
41  const unsigned int inChannels = rInputShape[channelsIndex];
42  const unsigned int outChannels = rOutputShape[channelsIndex];
43 
44  const unsigned int batchSize = rOutputShape[0];
45  const unsigned int outputHeight = rOutputShape[heightIndex];
46  const unsigned int outputWidth = rOutputShape[widthIndex];
47  const unsigned int outputDepth = rOutputShape[depthIndex];
48  const unsigned int inputHeight = rInputShape[heightIndex];
49  const unsigned int inputWidth = rInputShape[widthIndex];
50  const unsigned int inputDepth = rInputShape[depthIndex];
51 
52  // Conv3d weights layout: [D,H,W,I,O]
53  const unsigned int filterDepth = rFilterShape[0];
54  const unsigned int filterHeight = rFilterShape[1];
55  const unsigned int filterWidth = rFilterShape[2];
56 
57  const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
58  const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);
59 
60  const TensorShape biasShape{outChannels};
61  const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
62 
63  for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
64  {
65  for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)
66  {
67  for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
68  {
69  for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
70  {
71  for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)
72  {
73  // This loop goes over each output element.
74  float sum = 0.0f;
75 
76  // Loop over each input channel.
77  for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)
78  {
79  for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
80  {
81  for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
82  {
83  for (unsigned int cInput = 0; cInput < inChannels; cInput++)
84  {
85  // This loop goes over each input element for each output element.
86  unsigned int filterIndex = 0;
87 
88  // Conv3d weights layout: [D,H,W,I,O]
89  // Keep this implementation, as using DataLayoutIndexed::GetIndex
90  // causes large performance regression.
91  filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +
92  yFilter * filterWidth * inChannels * outChannels +
93  xFilter * inChannels * outChannels +
94  cInput * outChannels +
95  cOutput;
96 
97  unsigned int yInput = yOutput * yStride + yFilter * yDilation;
98  unsigned int xInput = xOutput * xStride + xFilter * xDilation;
99  unsigned int zInput = zOutput * zStride + zFilter * zDilation;
100 
101  float inputValue;
102 
103  // Check if we're in the padding.
104  if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
105  xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||
106  zInput < paddingFront || zInput >= inputDepth + paddingFront)
107  {
108  inputValue = 0.0f;
109  }
110  else
111  {
112  unsigned int inputIndex = 0;
113 
114  // Keep this implementation, as using DataLayoutIndexed::GetIndex
115  // causes large performance regression.
116  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
117  {
118  inputIndex =
119  batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
120  (zInput-paddingFront) * inputHeight * inputWidth * inChannels +
121  (yInput-paddingTop) * inputWidth * inChannels +
122  (xInput-paddingLeft) * inChannels +
123  cInput;
124  }
125  else
126  {
127  // NCDHW DataLayout
128  inputIndex =
129  batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
130  inputDepth * inputHeight * inputWidth * cInput +
131  (zInput-paddingFront) * inputHeight * inputWidth +
132  (yInput-paddingTop) * inputWidth +
133  xInput-paddingLeft;
134  }
135 
136  inputValue = inputVec[inputIndex];
137  }
138 
139  sum += filterVec[filterIndex] * inputValue;
140  }
141  }
142  }
143  }
144 
145  if (biasEnabled)
146  {
147  sum += biasVec[cOutput];
148  }
149 
150  unsigned int outIdx;
151  if (dataLayoutIndexed.GetDataLayout() == DataLayout::NDHWC)
152  {
153  outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
154  zOutput * outputHeight * outputWidth * outChannels +
155  yOutput * outputWidth * outChannels +
156  xOutput * outChannels +
157  cOutput;
158  }
159  else
160  {
161  // NCDHW DataLayout
162  outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
163  cOutput * outputDepth * outputHeight * outputWidth +
164  zOutput * outputHeight * outputWidth +
165  yOutput * outputWidth +
166  xOutput;
167  }
168 
169  rOutputEncoder[outIdx];
170  rOutputEncoder.Set(sum);
171  }
172  }
173  }
174  }
175  }
176 }
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...

◆ CopyArmComputeClTensorData()

void armnn::CopyArmComputeClTensorData ( arm_compute::CLTensor &  dstTensor,
const T *  srcData 
)

Definition at line 55 of file ClWorkloadUtils.hpp.

References ARMNN_SCOPED_PROFILING_EVENT_CL.

Referenced by ClConstantWorkload::Execute().

56 {
57  {
58  ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
59  dstTensor.map(true);
60  }
61 
62  {
63  ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
64  armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
65  }
66 
67  dstTensor.unmap();
68 }
#define ARMNN_SCOPED_PROFILING_EVENT_CL(name)

◆ CopyArmComputeTensorData()

void armnn::CopyArmComputeTensorData ( arm_compute::Tensor &  dstTensor,
const T *  srcData 
)

Definition at line 54 of file NeonWorkloadUtils.hpp.

Referenced by InitializeArmComputeTensorData().

55 {
56  InitialiseArmComputeTensorEmpty(dstTensor);
57  CopyArmComputeITensorData(srcData, dstTensor);
58 }

◆ CopyTensorContentsGeneric()

void armnn::CopyTensorContentsGeneric ( const ITensorHandle srcTensor,
ITensorHandle dstTensor,
CopyFunc  copy 
)

Definition at line 46 of file WorkloadUtils.hpp.

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, TensorShape::GetNumDimensions(), ITensorHandle::GetShape(), ITensorHandle::GetStrides(), IgnoreUnused(), ITensorHandle::Map(), MaxNumOfTensorDimensions, Undefined, and ITensorHandle::Unmap().

Referenced by CopyToOutputTensor(), NeonConvertBf16ToFp32Workload::Execute(), NeonConvertFp32ToBf16Workload::Execute(), NeonConvertFp16ToFp32Workload::Execute(), NeonConvertFp32ToFp16Workload::Execute(), CopyMemGenericWorkload::Execute(), CopyMemGenericWorkload::ExecuteAsync(), and LoadedNetwork::FreeWorkingMemory().

47 {
48  // For ease of understanding, names are assigned to the dimensions
49  // of the tensor as if NHWC, however this routine works with any 5D tensor
50  static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
51 
52  TensorShape srcStrides = srcTensor->GetStrides();
53  const TensorShape& srcShape = srcTensor->GetShape();
54  const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
55  IgnoreUnused(srcSize); // Only used for asserts
56  TensorShape dstStrides = dstTensor->GetStrides();
57  const TensorShape& dstShape = dstTensor->GetShape();
58  const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
59  IgnoreUnused(dstSize); // Only used for asserts
60 
61  size_t srcDepth = 1;
62  size_t srcBatches = 1;
63  size_t srcHeight = 1;
64  size_t srcWidth = 1;
65  size_t srcChannels = 1;
66  AssignValues(srcShape.GetNumDimensions(),
67  0,
68  srcShape,
69  srcChannels,
70  srcWidth,
71  srcHeight,
72  srcBatches,
73  srcDepth);
74 
75  size_t srcDepthStride = 0;
76  size_t srcBatchStride = 0;
77  size_t srcHeightStride = 0;
78  size_t srcWidthStride = 0;
79  size_t srcChannelStride = 0;
80  AssignValues(srcStrides.GetNumDimensions(),
81  0,
82  srcStrides,
83  srcChannelStride,
84  srcWidthStride,
85  srcHeightStride,
86  srcBatchStride,
87  srcDepthStride);
88 
89  size_t dstDepth = 1;
90  size_t dstBatches = 1;
91  size_t dstHeight = 1;
92  size_t dstWidth = 1;
93  size_t dstChannels = 1;
94  AssignValues(dstShape.GetNumDimensions(),
95  0,
96  dstShape,
97  dstChannels,
98  dstWidth,
99  dstHeight,
100  dstBatches,
101  dstDepth);
102 
103  size_t dstDepthStride = 0;
104  size_t dstBatchStride = 0;
105  size_t dstHeightStride = 0;
106  size_t dstWidthStride = 0;
107  size_t dstChannelStride = 0;
108  AssignValues(dstStrides.GetNumDimensions(),
109  0,
110  dstStrides,
111  dstChannelStride,
112  dstWidthStride,
113  dstHeightStride,
114  dstBatchStride,
115  dstDepthStride);
116 
117  const unsigned char* srcDataStart;
118  unsigned char* dstDataStart;
119  {
120  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
121  srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
122  dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
123  }
124 
125  size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
126  size_t copyWidth = std::min(srcWidth, dstWidth);
127  size_t copyHeight = std::min(srcHeight, dstHeight);
128  size_t copyBatches = std::min(srcBatches, dstBatches);
129  size_t copyDepth = std::min(srcDepth, dstDepth);
130 
131  // Coalesce inner dimensions where possible
132  // to reduce overheard calling copy() and to
133  // allow for memory bandwidth optimisations
134  if (copyLength == srcWidthStride &&
135  copyLength == dstWidthStride)
136  {
137  // There is no special padding between rows,
138  // and sizes are compatible, so copy whole rows
139  copyLength *= copyWidth;
140  copyWidth = 1;
141 
142  if (copyLength == srcHeightStride &&
143  copyLength == dstHeightStride)
144  {
145  // There is no special padding between batches
146  // and sizes are compatible so copy whole batches
147  copyLength *= copyHeight;
148  copyHeight = 1;
149  }
150  }
151 
152  const unsigned char* srcData = srcDataStart;
153  unsigned char* dstData = dstDataStart;
154  for (unsigned int d = 0; d < copyDepth; ++d)
155  {
156  auto srcPtrDepth = srcData;
157  auto dstPtrDepth = dstData;
158  for (unsigned int b = 0; b < copyBatches; ++b)
159  {
160  auto srcPtrBatch = srcData;
161  auto dstPtrBatch = dstData;
162  for (unsigned int h = 0; h < copyHeight; ++h)
163  {
164  auto srcPtrChannel = srcData;
165  auto dstPtrChannel = dstData;
166  for (unsigned int w = 0; w < copyWidth; ++w)
167  {
168  ARMNN_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
169  ARMNN_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
170  copy(dstData, srcData, copyLength);
171  dstData += dstWidthStride;
172  srcData += srcWidthStride;
173  }
174  dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
175  srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
176  }
177  dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
178  srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
179  }
180  dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
181  srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
182  }
183 
184  srcTensor->Unmap();
185  dstTensor->Unmap();
186 }
void IgnoreUnused(Ts &&...)
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31

◆ CopyToOutputTensor()

void armnn::CopyToOutputTensor ( const Tensor outputTensor,
ITensorHandle outputTensorHandle 
)

Definition at line 1294 of file LoadedNetwork.cpp.

References CopyTensorContentsGeneric(), BaseTensor< MemoryType >::GetInfo(), and BaseTensor< MemoryType >::GetMemoryArea().

Referenced by LoadedNetwork::Execute().

1295 {
1296  auto copyFunc = [](void* dst, const void* src, size_t size)
1297  {
1298  memcpy(dst, src, size);
1299  };
1300 
1301  std::unique_ptr<ITensorHandle> tensorHandle =
1302  std::make_unique<PassthroughTensorHandle>(outputTensor.GetInfo(),
1303  outputTensor.GetMemoryArea());
1304 
1305  CopyTensorContentsGeneric(outputTensorHandle, tensorHandle.get(), copyFunc);
1306 }
void CopyTensorContentsGeneric(const ITensorHandle *srcTensor, ITensorHandle *dstTensor, CopyFunc copy)

◆ CreateAclNormalizationLayerInfoForL2Normalization()

arm_compute::NormalizationLayerInfo armnn::CreateAclNormalizationLayerInfoForL2Normalization ( const armnn::TensorInfo tensorInfo,
armnn::DataLayout  dataLayout 
)
inline

Definition at line 28 of file ArmComputeUtils.hpp.

References TensorInfo::GetShape(), and NCHW.

30 {
31  unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
32  const unsigned int depth = tensorInfo.GetShape()[depthDimension];
33 
34  // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
35  // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
36  // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
37  // parameters.
38  //
39  // Please refer to both the reference implementation of the normalization layer and the implementation of
40  // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
41 
42  // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
43  // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
44  // ACL's normalization_layer_cross_map() CL function.
45  const uint32_t normSize = depth * 2u + 1u;
46 
47  // See ACL's NormalizationLayerInfo::scale_coeff() definition.
48  // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
49  const float alpha = 1.0f;
50 
51  // Don't offset the reduction.
52  const float kappa = 0.0f;
53 
54  // pow(reduction, -0.5) = 1 / sqrt(reduction)
55  const float beta = 0.5f;
56 
57  return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
58 }
const TensorShape & GetShape() const
Definition: Tensor.hpp:191

◆ CreateClContext()

flatbuffers::Offset<ClContext> armnn::CreateClContext ( flatbuffers::FlatBufferBuilder &  _fbb,
flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>>  programs = 0 
)
inline

Definition at line 57 of file ClContextSchema_generated.h.

References ClContextBuilder::add_programs(), and ClContextBuilder::Finish().

Referenced by CreateClContextDirect(), and ClContextSerializer::Serialize().

59  {
60  ClContextBuilder builder_(_fbb);
61  builder_.add_programs(programs);
62  return builder_.Finish();
63 }

◆ CreateClContextDirect()

flatbuffers::Offset<ClContext> armnn::CreateClContextDirect ( flatbuffers::FlatBufferBuilder &  _fbb,
const std::vector< flatbuffers::Offset< armnn::Program >> *  programs = nullptr 
)
inline

Definition at line 65 of file ClContextSchema_generated.h.

References CreateClContext().

67  {
68  auto programs__ = programs ? _fbb.CreateVector<flatbuffers::Offset<armnn::Program>>(*programs) : 0;
70  _fbb,
71  programs__);
72 }
flatbuffers::Offset< ClContext > CreateClContext(flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::Vector< flatbuffers::Offset< armnn::Program >>> programs=0)

◆ CreateDescriptorForConcatenation()

OriginsDescriptor armnn::CreateDescriptorForConcatenation ( TensorShapeIt  first,
TensorShapeIt  last,
unsigned int  concatenationDimension 
)

Convenience template to create an OriginsDescriptor to use when creating a ConcatLayer for performing concatenation of a number of input tensors.

Definition at line 261 of file Descriptors.hpp.

References OriginsDescriptor::SetConcatAxis(), and OriginsDescriptor::SetViewOriginCoord().

Referenced by ConcatDifferentInputOutputQParamTest(), CreateDescriptorForConcat(), and TEST_SUITE().

264 {
265  auto numInputs = std::distance(first, last);
266 
267  if (numInputs < 2)
268  {
269  throw InvalidArgumentException("Concatenation requires at least 2 inputs");
270  }
271 
272  const auto& firstInputShape = *first;
273 
274  const unsigned int numDimensions = firstInputShape.GetNumDimensions();
275  for (auto it = first + 1; it != last; ++it)
276  {
277  if (it->GetNumDimensions() != numDimensions)
278  {
279  throw InvalidArgumentException("All inputs to concatenation must have the same number of dimensions");
280  }
281  }
282 
283  if (concatenationDimension >= numDimensions)
284  {
285  throw InvalidArgumentException("concatenationDimension must be between 0 and the number of dimensions.");
286  }
287 
288  for (auto it = first; it != last; ++it)
289  {
290  for (unsigned int d = 0; d < numDimensions; ++d)
291  {
292  const bool dimSizeOk = (d == concatenationDimension) || (firstInputShape[d] == (*it)[d]);
293  if (!dimSizeOk)
294  {
295  throw InvalidArgumentException("All inputs to concatenation must be the same size along all dimensions "
296  " except the concatenation dimension");
297  }
298  }
299  }
300 
301  OriginsDescriptor viewsDescriptor(static_cast<uint32_t>(numInputs), numDimensions);
302  viewsDescriptor.SetConcatAxis(concatenationDimension);
303 
304  uint32_t viewIndex = 0u;
305  uint32_t coordAlongConcatDim = 0u;
306  for (auto it = first; it != last; ++it)
307  {
308  const auto& inputShape = *it;
309 
310  for (unsigned int i = 0; i < concatenationDimension; ++i)
311  {
312  viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
313  }
314 
315  viewsDescriptor.SetViewOriginCoord(viewIndex, concatenationDimension, coordAlongConcatDim);
316  unsigned int dimSize = inputShape[concatenationDimension];
317  coordAlongConcatDim += dimSize;
318 
319 
320  for (unsigned int i = concatenationDimension + 1; i < numDimensions; ++i)
321  {
322  viewsDescriptor.SetViewOriginCoord(viewIndex, i, 0);
323  }
324 
325  ++viewIndex;
326  }
327 
328  return viewsDescriptor;
329 }

◆ CreateProgram()

flatbuffers::Offset<Program> armnn::CreateProgram ( flatbuffers::FlatBufferBuilder &  _fbb,
flatbuffers::Offset< flatbuffers::String >  name = 0,
flatbuffers::Offset< flatbuffers::Vector< uint8_t >>  binary = 0 
)
inline

Definition at line 118 of file ClContextSchema_generated.h.

References ProgramBuilder::add_binary(), ProgramBuilder::add_name(), and ProgramBuilder::Finish().

Referenced by CreateProgramDirect(), and ClContextSerializer::Serialize().

121  {
122  ProgramBuilder builder_(_fbb);
123  builder_.add_binary(binary);
124  builder_.add_name(name);
125  return builder_.Finish();
126 }

◆ CreateProgramDirect()

flatbuffers::Offset<Program> armnn::CreateProgramDirect ( flatbuffers::FlatBufferBuilder &  _fbb,
const char *  name = nullptr,
const std::vector< uint8_t > *  binary = nullptr 
)
inline

Definition at line 128 of file ClContextSchema_generated.h.

References CreateProgram().

131  {
132  auto name__ = name ? _fbb.CreateString(name) : 0;
133  auto binary__ = binary ? _fbb.CreateVector<uint8_t>(*binary) : 0;
134  return armnn::CreateProgram(
135  _fbb,
136  name__,
137  binary__);
138 }
flatbuffers::Offset< Program > CreateProgram(flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset< flatbuffers::String > name=0, flatbuffers::Offset< flatbuffers::Vector< uint8_t >> binary=0)

◆ CreateSupportedBackends()

BackendsMap CreateSupportedBackends ( TensorHandleFactoryRegistry handleFactoryRegistry,
BackendSettings backendSettings 
)

Definition at line 1120 of file Network.cpp.

References ARMNN_ASSERT, BackendRegistryInstance(), and BackendSettings::m_SupportedBackends.

Referenced by Optimize().

1122 {
1123  BackendsMap backends;
1124  auto const& backendRegistry = BackendRegistryInstance();
1125  for (auto&& selectedBackend : backendSettings.m_SupportedBackends)
1126  {
1127  auto backendFactory = backendRegistry.GetFactory(selectedBackend);
1128  auto backendObjPtr = backendFactory();
1129  ARMNN_ASSERT(backendObjPtr);
1130 
1131  backendObjPtr->RegisterTensorHandleFactories(handleFactoryRegistry);
1132 
1133  backends[backendObjPtr->GetId()] = std::move(backendObjPtr);
1134  }
1135 
1136  return backends;
1137 }
BackendRegistry & BackendRegistryInstance()
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::map< BackendId, std::unique_ptr< class IBackendInternal > > BackendsMap
Definition: Network.hpp:294

◆ Debug()

void Debug ( const TensorInfo inputInfo,
const T *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Definition at line 19 of file Debug.cpp.

References Debug< BFloat16 >(), Debug< float >(), Debug< Half >(), Debug< int16_t >(), Debug< int32_t >(), Debug< int8_t >(), Debug< uint8_t >(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), and TensorInfo::GetShape().

Referenced by RefDebugWorkload< DataType >::ExecuteAsync().

24 {
25  const unsigned int numDims = inputInfo.GetNumDimensions();
26  const unsigned int numElements = inputInfo.GetNumElements();
27  const TensorShape& inputShape = inputInfo.GetShape();
28 
29  std::vector<unsigned int> strides(numDims, 0);
30  strides[numDims - 1] = inputShape[numDims - 1];
31 
32  for (unsigned int i = 2; i <= numDims; i++)
33  {
34  strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
35  }
36 
37  std::cout << "{ ";
38  std::cout << "\"layerGuid\": " << guid << ", ";
39  std::cout << "\"layerName\": \"" << layerName << "\", ";
40  std::cout << "\"outputSlot\": " << slotIndex << ", ";
41  std::cout << "\"shape\": ";
42 
43  std::cout << "[";
44  for (unsigned int i = 0; i < numDims; i++)
45  {
46  std::cout << inputShape[i];
47  if (i != numDims - 1)
48  {
49  std::cout << ", ";
50  }
51  }
52  std::cout << "], ";
53 
54  std::cout << "\"min\": "
55  << static_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
56 
57  std::cout << "\"max\": "
58  << static_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
59 
60  std::cout << "\"data\": ";
61 
62  for (unsigned int i = 0; i < numElements; i++)
63  {
64  for (unsigned int j = 0; j < numDims; j++)
65  {
66  if (i % strides[j] == 0)
67  {
68  std::cout << "[" ;
69  }
70  }
71 
72  std::cout << static_cast<float>(inputData[i]);
73 
74  for (unsigned int j = 0; j < numDims; j++)
75  {
76  if ((i+1) % strides[j] == 0)
77  {
78  std::cout << "]" ;
79  }
80  }
81 
82  if (i != numElements - 1)
83  {
84  std::cout << ", ";
85  }
86  }
87 
88  std::cout << " }" << std::endl;
89 }

◆ Debug< BFloat16 >()

template void armnn::Debug< BFloat16 > ( const TensorInfo inputInfo,
const BFloat16 inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ Debug< float >()

template void armnn::Debug< float > ( const TensorInfo inputInfo,
const float *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ Debug< Half >()

template void armnn::Debug< Half > ( const TensorInfo inputInfo,
const Half inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ Debug< int16_t >()

template void armnn::Debug< int16_t > ( const TensorInfo inputInfo,
const int16_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ Debug< int32_t >()

template void armnn::Debug< int32_t > ( const TensorInfo inputInfo,
const int32_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ Debug< int8_t >()

template void armnn::Debug< int8_t > ( const TensorInfo inputInfo,
const int8_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ Debug< uint8_t >()

template void armnn::Debug< uint8_t > ( const TensorInfo inputInfo,
const uint8_t *  inputData,
LayerGuid  guid,
const std::string &  layerName,
unsigned int  slotIndex 
)

Referenced by Debug().

◆ DepthToSpace()

void DepthToSpace ( const TensorInfo inputInfo,
const DepthToSpaceDescriptor descriptor,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 18 of file DepthToSpace.cpp.

References ARMNN_ASSERT, DepthToSpace(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), TensorShape::GetNumElements(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, NCHW, and armnnUtils::Permute().

Referenced by DepthToSpace(), and TEST_SUITE().

23 {
24  const unsigned int blockSize = descriptor.m_BlockSize;
25  ARMNN_ASSERT(blockSize != 0u);
26 
27  const TensorShape& inputShape = inputInfo.GetShape();
28  const unsigned int batches = inputShape[0];
29 
30  armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
31  const unsigned int inDepth = inputShape[dataLayoutIndexed.GetChannelsIndex()];
32  const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
33  const unsigned int inWidth = inputShape[dataLayoutIndexed.GetWidthIndex()];
34 
35  const unsigned int outDepth = inDepth / (blockSize * blockSize);
36 
37  // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
38  //
39  // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
40  // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
41  //
42  // DepthToSpace can then be implemented as a permutation in 6D resulting in
43  // the following shapes:
44  //
45  // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
46  // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
47  //
48  // NOTE:
49  // Since 6D tensors are not currently supported, in practice we need to handle each
50  // batch separately and execute 5D permutations
51 
52  TensorShape permDestShape;
53  PermutationVector permVector{};
54  if (descriptor.m_DataLayout == DataLayout::NCHW)
55  {
56  permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
57  permVector = { 2, 4, 0, 1, 3 };
58  }
59  else
60  {
61  permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
62  permVector = { 0, 2, 1, 3, 4 };
63  }
64 
65  const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
66 
67  for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
68  {
69  const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
70 
71  armnnUtils::Permute(permDestShape,
72  permVector,
73  static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
74  static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
75  dataTypeSize);
76  }
77 }
unsigned int GetNumElements() const
Function that calculates the tensor elements by multiplying all dimension size which are Specified...
Definition: Tensor.cpp:181
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:131
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).

◆ Dequantize() [1/4]

void Dequantize ( Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo 
)

Definition at line 13 of file Dequantize.cpp.

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumElements(), IgnoreUnused(), and Encoder< IType >::Set().

17 {
18  IgnoreUnused(outputInfo);
19  ARMNN_ASSERT(inputInfo.GetNumElements() == outputInfo.GetNumElements());
20  for (unsigned int i = 0; i < inputInfo.GetNumElements(); i++)
21  {
22  // inputDecoder.Get() dequantizes the data element from whatever
23  // type is given by inputInfo to fp32 (If MakeDecoder supports that dequantization)
24  // outputEncoder.Set() transforms the data element to whatever type is
25  // given by outputInfo (if MakeEncoder supports that transformation)
26  outputEncoder.Set(inputDecoder.Get());
27  ++outputEncoder;
28  ++inputDecoder;
29  }
30 }
virtual void Set(IType right)=0
void IgnoreUnused(Ts &&...)
virtual IType Get() const =0
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ Dequantize() [2/4]

std::vector<float> armnn::Dequantize ( const T *  quant,
const TensorInfo info 
)

u8 helpers

Definition at line 95 of file RefWorkloadUtils.hpp.

References Dequantize(), TensorInfo::GetNumElements(), TensorInfo::GetQuantizationOffset(), and TensorInfo::GetQuantizationScale().

96 {
97  std::vector<float> ret(info.GetNumElements());
98  for (size_t i = 0; i < info.GetNumElements(); i++)
99  {
100  ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
101  }
102  return ret;
103 }
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
Definition: TypesUtils.cpp:46

◆ Dequantize() [3/4]

void armnn::Dequantize ( const T *  inputData,
float *  outputData,
const TensorInfo info 
)
inline

Definition at line 106 of file RefWorkloadUtils.hpp.

References TensorInfo::GetNumElements(), TensorInfo::GetQuantizationOffset(), and TensorInfo::GetQuantizationScale().

107 {
108  for (unsigned int i = 0; i < info.GetNumElements(); i++)
109  {
110  outputData[i] = Dequantize<T>(inputData[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
111  }
112 }

◆ Dequantize() [4/4]

float Dequantize ( QuantizedType  value,
float  scale,
int32_t  offset 
)

Dequantize an 8-bit data type into a floating point data type.

Parameters
value- The value to dequantize.
scale- The scale (must be non-zero).
offset- The offset.
Returns
- The dequantized value calculated as (value-offset)*scale.

Definition at line 46 of file TypesUtils.cpp.

References ARMNN_ASSERT.

Referenced by SelectiveQuantizer< T, DoQuantize >::Dequantize(), Dequantize(), TensorPrinter::operator()(), and TEST_SUITE().

47 {
48  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
49  ARMNN_ASSERT(scale != 0.f);
50  ARMNN_ASSERT(!IsNan(value));
51  return (armnn::numeric_cast<float>(value - offset)) * scale;
52 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ DetectionPostProcess()

void DetectionPostProcess ( const TensorInfo boxEncodingsInfo,
const TensorInfo scoresInfo,
const TensorInfo anchorsInfo,
const TensorInfo detectionBoxesInfo,
const TensorInfo detectionClassesInfo,
const TensorInfo detectionScoresInfo,
const TensorInfo numDetectionsInfo,
const DetectionPostProcessDescriptor desc,
Decoder< float > &  boxEncodings,
Decoder< float > &  scores,
Decoder< float > &  anchors,
float *  detectionBoxes,
float *  detectionClasses,
float *  detectionScores,
float *  numDetections 
)

Definition at line 140 of file DetectionPostProcess.cpp.

References AllocateOutputData(), ARMNN_ASSERT, GenerateRangeK(), Decoder< IType >::Get(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, DetectionPostProcessDescriptor::m_ScaleH, DetectionPostProcessDescriptor::m_ScaleW, DetectionPostProcessDescriptor::m_ScaleX, DetectionPostProcessDescriptor::m_ScaleY, DetectionPostProcessDescriptor::m_UseRegularNms, NonMaxSuppression(), numeric_cast(), and TopKSort().

Referenced by TEST_SUITE().

155 {
156  IgnoreUnused(anchorsInfo, detectionClassesInfo, detectionScoresInfo, numDetectionsInfo);
157 
158  // Transform center-size format which is (ycenter, xcenter, height, width) to box-corner format,
159  // which represents the lower left corner and the upper right corner (ymin, xmin, ymax, xmax)
160  std::vector<float> boxCorners(boxEncodingsInfo.GetNumElements());
161 
162  const unsigned int numBoxes = boxEncodingsInfo.GetShape()[1];
163  const unsigned int numScores = scoresInfo.GetNumElements();
164 
165  for (unsigned int i = 0; i < numBoxes; ++i)
166  {
167  // Y
168  float boxEncodingY = boxEncodings.Get();
169  float anchorY = anchors.Get();
170 
171  ++boxEncodings;
172  ++anchors;
173 
174  // X
175  float boxEncodingX = boxEncodings.Get();
176  float anchorX = anchors.Get();
177 
178  ++boxEncodings;
179  ++anchors;
180 
181  // H
182  float boxEncodingH = boxEncodings.Get();
183  float anchorH = anchors.Get();
184 
185  ++boxEncodings;
186  ++anchors;
187 
188  // W
189  float boxEncodingW = boxEncodings.Get();
190  float anchorW = anchors.Get();
191 
192  ++boxEncodings;
193  ++anchors;
194 
195  float yCentre = boxEncodingY / desc.m_ScaleY * anchorH + anchorY;
196  float xCentre = boxEncodingX / desc.m_ScaleX * anchorW + anchorX;
197 
198  float halfH = 0.5f * expf(boxEncodingH / desc.m_ScaleH) * anchorH;
199  float halfW = 0.5f * expf(boxEncodingW / desc.m_ScaleW) * anchorW;
200 
201  unsigned int indexY = i * 4;
202  unsigned int indexX = indexY + 1;
203  unsigned int indexH = indexX + 1;
204  unsigned int indexW = indexH + 1;
205 
206  // ymin
207  boxCorners[indexY] = yCentre - halfH;
208  // xmin
209  boxCorners[indexX] = xCentre - halfW;
210  // ymax
211  boxCorners[indexH] = yCentre + halfH;
212  // xmax
213  boxCorners[indexW] = xCentre + halfW;
214 
215  ARMNN_ASSERT(boxCorners[indexY] < boxCorners[indexH]);
216  ARMNN_ASSERT(boxCorners[indexX] < boxCorners[indexW]);
217  }
218 
219  unsigned int numClassesWithBg = desc.m_NumClasses + 1;
220 
221  // Decode scores
222  std::vector<float> decodedScores;
223  decodedScores.reserve(numScores);
224 
225  for (unsigned int i = 0u; i < numScores; ++i)
226  {
227  decodedScores.emplace_back(scores.Get());
228  ++scores;
229  }
230 
231  // Perform Non Max Suppression.
232  if (desc.m_UseRegularNms)
233  {
234  // Perform Regular NMS.
235  // For each class, perform NMS and select max detection numbers of the highest score across all classes.
236  std::vector<float> classScores(numBoxes);
237 
238  std::vector<unsigned int> selectedBoxesAfterNms;
239  selectedBoxesAfterNms.reserve(numBoxes);
240 
241  std::vector<float> selectedScoresAfterNms;
242  selectedBoxesAfterNms.reserve(numScores);
243 
244  std::vector<unsigned int> selectedClasses;
245 
246  for (unsigned int c = 0; c < desc.m_NumClasses; ++c)
247  {
248  // For each boxes, get scores of the boxes for the class c.
249  for (unsigned int i = 0; i < numBoxes; ++i)
250  {
251  classScores[i] = decodedScores[i * numClassesWithBg + c + 1];
252  }
253  std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes,
254  boxCorners,
255  classScores,
256  desc.m_NmsScoreThreshold,
257  desc.m_DetectionsPerClass,
258  desc.m_NmsIouThreshold);
259 
260  for (unsigned int i = 0; i < selectedIndices.size(); ++i)
261  {
262  selectedBoxesAfterNms.push_back(selectedIndices[i]);
263  selectedScoresAfterNms.push_back(classScores[selectedIndices[i]]);
264  selectedClasses.push_back(c);
265  }
266  }
267 
268  // Select max detection numbers of the highest score across all classes
269  unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedBoxesAfterNms.size());
270  unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
271 
272  // Sort the max scores among the selected indices.
273  std::vector<unsigned int> outputIndices = GenerateRangeK(numSelected);
274  TopKSort(numOutput, outputIndices.data(), selectedScoresAfterNms.data(), numSelected);
275 
276  AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, outputIndices,
277  selectedBoxesAfterNms, selectedClasses, selectedScoresAfterNms,
278  detectionBoxes, detectionScores, detectionClasses, numDetections);
279  }
280  else
281  {
282  // Perform Fast NMS.
283  // Select max scores of boxes and perform NMS on max scores,
284  // select max detection numbers of the highest score
285  unsigned int numClassesPerBox = std::min(desc.m_MaxClassesPerDetection, desc.m_NumClasses);
286  std::vector<float> maxScores;
287  std::vector<unsigned int>boxIndices;
288  std::vector<unsigned int>maxScoreClasses;
289 
290  for (unsigned int box = 0; box < numBoxes; ++box)
291  {
292  unsigned int scoreIndex = box * numClassesWithBg + 1;
293 
294  // Get the max scores of the box.
295  std::vector<unsigned int> maxScoreIndices = GenerateRangeK(desc.m_NumClasses);
296  TopKSort(numClassesPerBox, maxScoreIndices.data(),
297  decodedScores.data() + scoreIndex, desc.m_NumClasses);
298 
299  for (unsigned int i = 0; i < numClassesPerBox; ++i)
300  {
301  maxScores.push_back(decodedScores[scoreIndex + maxScoreIndices[i]]);
302  maxScoreClasses.push_back(maxScoreIndices[i]);
303  boxIndices.push_back(box);
304  }
305  }
306 
307  // Perform NMS on max scores
308  std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, maxScores,
309  desc.m_NmsScoreThreshold,
310  desc.m_MaxDetections,
311  desc.m_NmsIouThreshold);
312 
313  unsigned int numSelected = armnn::numeric_cast<unsigned int>(selectedIndices.size());
314  unsigned int numOutput = std::min(desc.m_MaxDetections, numSelected);
315 
316  AllocateOutputData(detectionBoxesInfo.GetShape()[1], numOutput, boxCorners, selectedIndices,
317  boxIndices, maxScoreClasses, maxScores,
318  detectionBoxes, detectionScores, detectionClasses, numDetections);
319  }
320 }
std::vector< unsigned int > GenerateRangeK(unsigned int k)
void IgnoreUnused(Ts &&...)
virtual IType Get() const =0
void TopKSort(unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void AllocateOutputData(unsigned int numOutput, unsigned int numSelected, const std::vector< float > &boxCorners, const std::vector< unsigned int > &outputIndices, const std::vector< unsigned int > &selectedBoxes, const std::vector< unsigned int > &selectedClasses, const std::vector< float > &selectedScores, float *detectionBoxes, float *detectionScores, float *detectionClasses, float *numDetections)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
std::vector< unsigned int > NonMaxSuppression(unsigned int numBoxes, const std::vector< float > &boxCorners, const std::vector< float > &scores, float nmsScoreThreshold, unsigned int maxDetection, float nmsIouThreshold)

◆ ExtractJsonObjects()

void armnn::ExtractJsonObjects ( unsigned int  inferenceIndex,
const Event parentEvent,
JsonChildObject parentObject,
std::map< const Event *, std::vector< const Event *>>  descendantsMap 
)

Definition at line 303 of file Profiling.cpp.

References JsonChildObject::AddChild(), JsonChildObject::AddMeasurement(), ARMNN_ASSERT, Event, JsonChildObject::GetChild(), Event::GetMeasurements(), Event::GetProfilingGuid(), OptionalBase::has_value(), Measurement, JsonChildObject::NumChildren(), JsonChildObject::SetGuid(), JsonChildObject::SetType(), JsonChildObject::SetUnit(), and OptionalReferenceSwitch< IsReference, T >::value().

Referenced by ProfilerImpl::Print().

307 {
308  ARMNN_ASSERT(parentEvent);
309 
310  // If profiling GUID is entered, process it
311  if (parentEvent->GetProfilingGuid().has_value())
312  {
313  arm::pipe::ProfilingGuid profilingGuid;
314  profilingGuid = parentEvent->GetProfilingGuid().value();
315  parentObject.SetGuid(profilingGuid);
316  }
317  std::vector<Measurement> instrumentMeasurements = parentEvent->GetMeasurements();
318  unsigned int childIdx = 0;
319  for (size_t measurementIndex = 0; measurementIndex < instrumentMeasurements.size(); ++measurementIndex, ++childIdx)
320  {
321  if (inferenceIndex == 0)
322  {
323  // Only add kernel measurement once, in case of multiple inferences
324  JsonChildObject measurementObject{ instrumentMeasurements[measurementIndex].m_Name };
325  measurementObject.SetUnit(instrumentMeasurements[measurementIndex].m_Unit);
326  measurementObject.SetType(JsonObjectType::Measurement);
327 
328  ARMNN_ASSERT(parentObject.NumChildren() == childIdx);
329  parentObject.AddChild(measurementObject);
330  }
331 
332  parentObject.GetChild(childIdx).AddMeasurement(instrumentMeasurements[measurementIndex].m_Value);
333  }
334 
335  auto childEventsIt = descendantsMap.find(parentEvent);
336  if (childEventsIt != descendantsMap.end())
337  {
338  for (auto childEvent : childEventsIt->second)
339  {
340  if (inferenceIndex == 0)
341  {
342  // Only add second level once, in case of multiple inferences
343  JsonChildObject childObject{ childEvent->GetName() };
344  childObject.SetType(JsonObjectType::Event);
345  parentObject.AddChild(childObject);
346  }
347 
348  // It's possible that childIdx can overrun the parents' child vector. Check before we try to process a
349  // non-existent child.
350  if (childIdx < parentObject.NumChildren())
351  {
352  // Recursively process children.
353  ExtractJsonObjects(inferenceIndex, childEvent, parentObject.GetChild(childIdx), descendantsMap);
354  childIdx++;
355  }
356  }
357  }
358 }
void ExtractJsonObjects(unsigned int inferenceIndex, const Event *parentEvent, JsonChildObject &parentObject, std::map< const Event *, std::vector< const Event *>> descendantsMap)
Definition: Profiling.cpp:303
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ FakeQuantization()

void armnn::FakeQuantization ( const float *  inputData,
float *  outputData,
uint32_t  numElements,
float  min,
float  max 
)

Definition at line 17 of file RefFakeQuantizationFloat32Workload.cpp.

References numeric_cast().

Referenced by TEST_SUITE().

18 {
19  float scale = (max - min) / 255.f;
20  int32_t offset = armnn::numeric_cast<int32_t>((-min * 255.f) / (max - min));
21 
22  for (uint32_t i = 0; i < numElements; i++)
23  {
24  outputData[i] = static_cast<float>(armnn::Quantize<uint8_t>(inputData[i], scale, offset));
25  }
26 
27 }
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ FalseFunc()

bool armnn::FalseFunc ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 62 of file LayerSupportCommon.hpp.

References IgnoreUnused().

63 {
64  IgnoreUnused(reasonIfUnsupported);
65  IgnoreUnused(params...);
66  return false;
67 }
void IgnoreUnused(Ts &&...)

◆ FalseFuncF16()

bool armnn::FalseFuncF16 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 70 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

71 {
72  IgnoreUnused(params...);
73  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type");
74  return false;
75 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseFuncF32()

bool armnn::FalseFuncF32 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 78 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

79 {
80  IgnoreUnused(params...);
81  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type");
82  return false;
83 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseFuncI32()

bool armnn::FalseFuncI32 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 94 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

95 {
96  IgnoreUnused(params...);
97  SetValueChecked(reasonIfUnsupported, "Layer is not supported with int32 data type");
98  return false;
99 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseFuncU8()

bool armnn::FalseFuncU8 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 86 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

87 {
88  IgnoreUnused(params...);
89  SetValueChecked(reasonIfUnsupported, "Layer is not supported with 8-bit data type");
90  return false;
91 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseInputFuncF16()

bool armnn::FalseInputFuncF16 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 110 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

111 {
112  IgnoreUnused(params...);
113  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type input");
114  return false;
115 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseInputFuncF32()

bool armnn::FalseInputFuncF32 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 102 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

103 {
104  IgnoreUnused(params...);
105  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type input");
106  return false;
107 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseOutputFuncF16()

bool armnn::FalseOutputFuncF16 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 126 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

127 {
128  IgnoreUnused(params...);
129  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float16 data type output");
130  return false;
131 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ FalseOutputFuncF32()

bool armnn::FalseOutputFuncF32 ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 118 of file LayerSupportCommon.hpp.

References IgnoreUnused(), and SetValueChecked().

119 {
120  IgnoreUnused(params...);
121  SetValueChecked(reasonIfUnsupported, "Layer is not supported with float32 data type output");
122  return false;
123 }
void IgnoreUnused(Ts &&...)
void SetValueChecked(Optional< T &> optionalRef, V &&val)

◆ Fill()

void Fill ( Encoder< float > &  output,
const TensorShape desiredOutputShape,
const float  value 
)

Creates a tensor and fills it with a scalar value.

Definition at line 13 of file Fill.cpp.

References TensorShape::GetNumElements(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

16 {
17  for(unsigned int i = 0; i < desiredOutputShape.GetNumElements(); ++i)
18  {
19  output[i];
20  output.Set(value);
21  }
22 }
virtual void Set(IType right)=0

◆ FindKernelMeasurements()

std::vector<Measurement> armnn::FindKernelMeasurements ( const Event event)

Definition at line 62 of file Profiling.cpp.

References ARMNN_ASSERT, and Event::GetMeasurements().

63 {
64  ARMNN_ASSERT(event != nullptr);
65 
66  std::vector<Measurement> measurements;
67 
68  // Search through the measurements.
69  for (const auto& measurement : event->GetMeasurements())
70  {
71  if (measurement.m_Name.rfind("OpenClKernelTimer", 0) == 0
72  || measurement.m_Name.rfind("NeonKernelTimer", 0) == 0)
73  {
74  // Measurement found.
75  measurements.push_back(measurement);
76  }
77  }
78 
79  return measurements;
80 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ FindMeasurement()

Measurement armnn::FindMeasurement ( const std::string &  name,
const Event event 
)

Definition at line 43 of file Profiling.cpp.

References ARMNN_ASSERT, and Event::GetMeasurements().

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults(), and ProfilerImpl::CalculateProfilingEventStats().

44 {
45 
46  ARMNN_ASSERT(event != nullptr);
47 
48  // Search though the measurements.
49  for (const auto& measurement : event->GetMeasurements())
50  {
51  if (measurement.m_Name == name)
52  {
53  // Measurement found.
54  return measurement;
55  }
56  }
57 
58  // Measurement not found.
59  return Measurement{ "", 0.f, Measurement::Unit::TIME_MS };
60 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ FinishClContextBuffer()

void armnn::FinishClContextBuffer ( flatbuffers::FlatBufferBuilder &  fbb,
flatbuffers::Offset< armnn::ClContext >  root 
)
inline

Definition at line 171 of file ClContextSchema_generated.h.

References ClContextIdentifier().

173  {
174  fbb.Finish(root, ClContextIdentifier());
175 }
const char * ClContextIdentifier()

◆ FinishSizePrefixedClContextBuffer()

void armnn::FinishSizePrefixedClContextBuffer ( flatbuffers::FlatBufferBuilder &  fbb,
flatbuffers::Offset< armnn::ClContext >  root 
)
inline

Definition at line 177 of file ClContextSchema_generated.h.

References ClContextIdentifier().

179  {
180  fbb.FinishSizePrefixed(root, ClContextIdentifier());
181 }
const char * ClContextIdentifier()

◆ ForEachLayerInput()

void armnn::ForEachLayerInput ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo,
Delegate  function 
)

Definition at line 267 of file SubgraphViewSelector.cpp.

References ARMNN_ASSERT_MSG, and Layer::GetInputSlots().

Referenced by AssignSplitId(), and IsReadyForSplitAssignment().

270 {
271  Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
272 
273  for (auto inputSlot : layer.GetInputSlots())
274  {
275  auto connectedInput = PolymorphicDowncast<OutputSlot*>(inputSlot.GetConnection());
276  ARMNN_ASSERT_MSG(connectedInput, "Dangling input slot detected.");
277  Layer& inputLayer = connectedInput->GetOwningLayer();
278 
279  auto parentInfo = layerInfos.find(&inputLayer);
280  if (parentInfo != layerInfos.end())
281  {
282  function(parentInfo->second);
283  }
284  }
285 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ ForEachLayerOutput()

void armnn::ForEachLayerOutput ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo,
Delegate  function 
)

Definition at line 288 of file SubgraphViewSelector.cpp.

References Layer::GetOutputSlots().

Referenced by SubgraphViewSelector::SelectSubgraphs().

291 {
292  Layer& layer = *PolymorphicDowncast<Layer*>(layerInfo.m_Layer);
293 
294  for (auto& outputSlot : layer.GetOutputSlots())
295  {
296  for (auto& output : outputSlot.GetConnections())
297  {
298  Layer& childLayer = output->GetOwningLayer();
299 
300  auto childInfo = layerInfos.find(&childLayer);
301  if (childInfo != layerInfos.end())
302  {
303  function(childInfo->second);
304  }
305  }
306  }
307 }

◆ FullyConnected()

void FullyConnected ( const TensorShape rInputShape,
Decoder< float > &  rInputDecoder,
const TensorShape rOutputShape,
Encoder< float > &  rOutputEncoder,
const TensorShape rWeightsShape,
Decoder< float > &  rWeightDecoder,
Decoder< float > *  pBiasDecoder,
const bool  biasEnabled,
const unsigned int  K,
const bool  transposeWeights 
)

Performs a matrix multiplication and optionally adds a bias.

Definition at line 15 of file FullyConnected.cpp.

References ARMNN_ASSERT, Decoder< IType >::DecodeTensor(), and Encoder< IType >::Set().

25 {
26  // Perform FullyConnected implementation
27  unsigned int outputSize = rOutputShape[1];
28 
29  const std::vector<float> decodedInputs = rInputDecoder.DecodeTensor(rInputShape);
30  const std::vector<float> decodedWeights = rWeightDecoder.DecodeTensor(rWeightsShape);
31 
32  const TensorShape biasShape{outputSize};
33 
34  ARMNN_ASSERT(!biasEnabled || pBiasDecoder != nullptr);
35  const std::vector<float> decodedBiases = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
36 
37 
38  for (unsigned int n = 0; n < rInputShape[0]; n++)
39  {
40  for (unsigned int channelOutput = 0; channelOutput < outputSize; channelOutput++)
41  {
42  float outval = 0.f;
43 
44  for (unsigned int channelInput = 0; channelInput < K; channelInput++)
45  {
46  float weight;
47  if (transposeWeights)
48  {
49  weight = decodedWeights[channelOutput * K + channelInput];
50  }
51  else
52  {
53  weight = decodedWeights[channelInput * outputSize + channelOutput];
54  }
55 
56  outval += weight * decodedInputs[n * K + channelInput];
57  }
58 
59  if (biasEnabled)
60  {
61  outval += decodedBiases[channelOutput];
62  }
63 
64  rOutputEncoder[n * outputSize + channelOutput];
65  rOutputEncoder.Set(outval);
66  }
67  }
68 }
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ FuseAdditionLayer()

LayerType* armnn::FuseAdditionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 116 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

121 {
122  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddAdditionLayer(name.c_str());
123  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
124 
125  FuseLayer(optimizationViews,
126  baseLayer,
127  replacementLayer,
128  activationLayer,
129  activationDesc);
130 
131  return replacementLayer;
132 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseBatchNormalizationLayer()

LayerType* armnn::FuseBatchNormalizationLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 192 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

197 {
198  IConnectableLayer* replacement =
199  optimizationViews.GetINetwork()->AddBatchNormalizationLayer(baseLayer->GetParameters(),
200  ConstTensor(),
201  ConstTensor(),
202  ConstTensor(),
203  ConstTensor(),
204  name.c_str());
205  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
206 
207  FuseLayer(optimizationViews,
208  baseLayer,
209  replacementLayer,
210  activationLayer,
211  activationDesc);
212 
213  SubgraphView substitutionSubgraph({baseLayer, activationLayer},
214  CreateIInputsFrom({baseLayer}),
215  CreateIOutputsFrom({activationLayer}));
216  SubgraphView replacementSubgraph(replacementLayer);
217 
218  return replacementLayer;
219 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseConvolution2dLayer()

LayerType* armnn::FuseConvolution2dLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 222 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

227 {
228  IConnectableLayer* replacement = optimizationViews.GetINetwork()
229  ->AddConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
230 
231  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
232 
233  replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
234  replacementLayer->m_Bias = std::move(baseLayer->m_Bias);
235 
236  FuseLayer(optimizationViews,
237  baseLayer,
238  replacementLayer,
239  activationLayer,
240  activationDesc);
241 
242  return replacementLayer;
243 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseDepthwiseConvolution2dLayer()

LayerType* armnn::FuseDepthwiseConvolution2dLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 246 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

251 {
252  IConnectableLayer* replacement =
253  optimizationViews.GetINetwork()->AddDepthwiseConvolution2dLayer(baseLayer->GetParameters(), name.c_str());
254 
255  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
256 
257  replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
258  replacementLayer->m_Bias = std::move(baseLayer->m_Bias);
259 
260  FuseLayer(optimizationViews,
261  baseLayer,
262  replacementLayer,
263  activationLayer,
264  activationDesc);
265 
266  return replacementLayer;
267 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseDivisionLayer()

LayerType* armnn::FuseDivisionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 154 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

159 {
160  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddDivisionLayer(name.c_str());
161  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
162 
163  FuseLayer(optimizationViews,
164  baseLayer,
165  replacementLayer,
166  activationLayer,
167  activationDesc);
168 
169  return replacementLayer;
170 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseFullyConnectedLayer()

LayerType* armnn::FuseFullyConnectedLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 270 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

275 {
276  IConnectableLayer* replacement =
277  optimizationViews.GetINetwork()->AddFullyConnectedLayer(baseLayer->GetParameters(),
278  name.c_str());
279  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
280 
281  FuseLayer(optimizationViews,
282  baseLayer,
283  replacementLayer,
284  activationLayer,
285  activationDesc);
286 
287  replacementLayer->m_Weight = std::move(baseLayer->m_Weight);
288  replacementLayer->m_Bias = std::move(baseLayer->m_Bias);
289 
290  return replacementLayer;
291 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseLayer()

LayerType* armnn::FuseLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
LayerType replacementLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc 
)

Definition at line 96 of file ArmComputeSubgraphUtils.hpp.

References OptimizationViews::AddSubstitution().

Referenced by FuseAdditionLayer(), FuseBatchNormalizationLayer(), FuseConvolution2dLayer(), FuseDepthwiseConvolution2dLayer(), FuseDivisionLayer(), FuseFullyConnectedLayer(), FuseMultiplicationLayer(), and FuseSubtractionLayer().

101 {
102  replacementLayer->SetAdditionalInfoForObject(
103  std::make_shared<ActivationDescriptor>(activationDesc));
104 
105  SubgraphView substitutionSubgraph({baseLayer, activationLayer},
106  CreateIInputsFrom({baseLayer}),
107  CreateIOutputsFrom({activationLayer}));
108  SubgraphView replacementSubgraph(replacementLayer);
109 
110  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
111 
112  return replacementLayer;
113 }

◆ FuseMultiplicationLayer()

LayerType* armnn::FuseMultiplicationLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 173 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

178 {
179  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddMultiplicationLayer(name.c_str());
180  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
181 
182  FuseLayer(optimizationViews,
183  baseLayer,
184  replacementLayer,
185  activationLayer,
186  activationDesc);
187 
188  return replacementLayer;
189 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ FuseSubtractionLayer()

LayerType* armnn::FuseSubtractionLayer ( OptimizationViews optimizationViews,
LayerType baseLayer,
ActivationLayer activationLayer,
ActivationDescriptor activationDesc,
std::string  name 
)

Definition at line 135 of file ArmComputeSubgraphUtils.hpp.

References FuseLayer(), and OptimizationViews::GetINetwork().

140 {
141  IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddSubtractionLayer(name.c_str());
142  LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
143 
144  FuseLayer(optimizationViews,
145  baseLayer,
146  replacementLayer,
147  activationLayer,
148  activationDesc);
149 
150  return replacementLayer;
151 }
LayerType * FuseLayer(OptimizationViews &optimizationViews, LayerType *baseLayer, LayerType *replacementLayer, ActivationLayer *activationLayer, ActivationDescriptor &activationDesc)
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ Gather()

void Gather ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo,
Decoder< float > &  params,
const int32_t *  indices,
Encoder< float > &  output,
const int32_t  axis 
)

Definition at line 17 of file Gather.cpp.

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), IgnoreUnused(), numeric_cast(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

24 {
25  IgnoreUnused(outputInfo);
26  IgnoreUnused(axis);
27 
28  const TensorShape& paramsShape = paramsInfo.GetShape();
29 
30  unsigned int paramsProduct = 1;
31  for (unsigned int i = 1; i < paramsInfo.GetNumDimensions(); ++i)
32  {
33  paramsProduct = paramsProduct * paramsShape[i];
34  }
35 
36  unsigned int outIndex = 0;
37  for (unsigned int i = 0; i < indicesInfo.GetNumElements(); ++i)
38  {
39  unsigned int indx = armnn::numeric_cast<unsigned int>(indices[i]);
40 
41  ARMNN_ASSERT(indices[i] >= 0 && indx < paramsShape[0]);
42 
43  unsigned int startOffset = indx * paramsProduct;
44  unsigned int endOffset = startOffset + paramsProduct;
45 
46  for (unsigned int j = startOffset; j < endOffset; ++j)
47  {
48  params[j];
49  float outputValue = params.Get();
50  output[outIndex];
51  output.Set(outputValue);
52  ++outIndex;
53  }
54  }
55 
56  ARMNN_ASSERT(outIndex == outputInfo.GetNumElements());
57 }
virtual void Set(IType right)=0
void IgnoreUnused(Ts &&...)
virtual IType Get() const =0
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ GatherTensorHandlePairs()

void armnn::GatherTensorHandlePairs ( const DescriptorType &  descriptor,
std::vector< std::pair< SrcTensorHandleType *, DstTensorHandleType *>> &  tensorHandlePairs 
)

Definition at line 189 of file WorkloadUtils.hpp.

References CalculateGatherNdKeyIndices(), Convert1HWOTensorInfoToAcl(), Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), ConvertMaskToACLFormat(), ConvertWeightTensorFromArmnnToAcl(), ConvertWeightTensorInfoFromArmnnToAcl(), PermuteTensor(), and ReshapeWeightsForAcl().

Referenced by CopyMemGenericWorkload::CopyMemGenericWorkload(), CopyMemGenericWorkload::ExecuteAsync(), NeonConvertBf16ToFp32Workload::NeonConvertBf16ToFp32Workload(), NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(), NeonConvertFp32ToBf16Workload::NeonConvertFp32ToBf16Workload(), and NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload().

191 {
192  const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
193  tensorHandlePairs.reserve(numInputs);
194 
195  for (unsigned int i = 0; i < numInputs; ++i)
196  {
197  SrcTensorHandleType* const srcTensorHandle =
198  PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
199  DstTensorHandleType* const dstTensorHandle =
200  PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
201 
202  tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
203  }
204 }

◆ GenerateRangeK()

std::vector<unsigned int> armnn::GenerateRangeK ( unsigned int  k)

Definition at line 17 of file DetectionPostProcess.cpp.

Referenced by DetectionPostProcess(), and NonMaxSuppression().

18 {
19  std::vector<unsigned int> range(k);
20  std::iota(range.begin(), range.end(), 0);
21  return range;
22 }

◆ GetActivationFunctionAsCString()

constexpr char const* armnn::GetActivationFunctionAsCString ( ActivationFunction  activation)

Definition at line 27 of file TypesUtils.hpp.

References Abs, BoundedReLu, Elu, HardSwish, LeakyReLu, Linear, ReLu, Sigmoid, SoftReLu, Sqrt, Square, and TanH.

Referenced by StringifyLayerParameters< ActivationDescriptor >::Serialize().

28 {
29  switch (activation)
30  {
31  case ActivationFunction::Sigmoid: return "Sigmoid";
32  case ActivationFunction::TanH: return "TanH";
33  case ActivationFunction::Linear: return "Linear";
34  case ActivationFunction::ReLu: return "ReLu";
35  case ActivationFunction::BoundedReLu: return "BoundedReLu";
36  case ActivationFunction::SoftReLu: return "SoftReLu";
37  case ActivationFunction::LeakyReLu: return "LeakyReLu";
38  case ActivationFunction::Abs: return "Abs";
39  case ActivationFunction::Sqrt: return "Sqrt";
40  case ActivationFunction::Square: return "Square";
41  case ActivationFunction::Elu: return "Elu";
42  case ActivationFunction::HardSwish: return "HardSwish";
43  default: return "Unknown";
44  }
45 }

◆ GetArgMinMaxFunctionAsCString()

constexpr char const* armnn::GetArgMinMaxFunctionAsCString ( ArgMinMaxFunction  function)

Definition at line 47 of file TypesUtils.hpp.

References Max, and Min.

48 {
49  switch (function)
50  {
51  case ArgMinMaxFunction::Max: return "Max";
52  case ArgMinMaxFunction::Min: return "Min";
53  default: return "Unknown";
54  }
55 }

◆ GetBiasDataType()

DataType GetBiasDataType ( DataType  inputDataType)

Definition at line 27 of file WorkloadData.cpp.

References ARMNN_ASSERT_MSG, ARMNN_LOG, BFloat16, CHECK_LOCATION, TensorInfo::GetDataType(), GetDataTypeName(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetQuantizationDim(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::GetQuantizationScales(), TensorInfo::GetShape(), OptionalBase::has_value(), TensorInfo::HasMultipleQuantizationScales(), TensorInfo::HasPerAxisQuantization(), info, TensorInfo::IsQuantized(), IsQuantized8BitType(), TensorInfo::IsTypeSpaceMatch(), WorkloadInfo::m_InputTensorInfos, WorkloadInfo::m_OutputTensorInfos, OptionalReferenceSwitch< std::is_reference< T >::value, T >::value(), and warning.

Referenced by CompareDepthwiseConvolution2dTestImpl(), TEST_SUITE(), FullyConnectedQueueDescriptor::Validate(), Convolution2dQueueDescriptor::Validate(), Convolution3dQueueDescriptor::Validate(), DepthwiseConvolution2dQueueDescriptor::Validate(), and TransposeConvolution2dQueueDescriptor::Validate().

28 {
29  switch (inputDataType)
30  {
31  case DataType::Float16:
32  return DataType::Float16;
33  case DataType::BFloat16:
34  case DataType::Float32:
35  return DataType::Float32;
36  case DataType::QAsymmS8:
37  return DataType::Signed32;
38  case DataType::QAsymmU8:
39  return DataType::Signed32;
40  case DataType::QSymmS8:
41  return DataType::Signed32;
42  case DataType::QSymmS16:
43  return DataType::Signed32;
44  default:
45  ARMNN_ASSERT_MSG(false, "Invalid input data type");
46  return DataType::Float32;
47  }
48 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ GetBiasTypeFromWeightsType()

armnn::Optional< armnn::DataType > GetBiasTypeFromWeightsType ( armnn::Optional< armnn::DataType weightsType)
inline

Definition at line 14 of file LayerSupportRules.hpp.

References ARMNN_ASSERT_MSG, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and OptionalReferenceSwitch< std::is_reference< T >::value, T >::value().

Referenced by BiasAndWeightsTypesCompatible::BiasAndWeightsTypesCompatible(), BiasAndWeightsTypesMatch::BiasAndWeightsTypesMatch(), and FullyConnectedTest().

15 {
16  if (!weightsType)
17  {
18  return weightsType;
19  }
20 
21  switch(weightsType.value())
22  {
25  return weightsType;
31  default:
32  ARMNN_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type.");
33  }
34  return armnn::EmptyOptional();
35 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32

◆ GetCapability() [1/2]

Optional< const BackendOptions::BackendOption > GetCapability ( const std::string &  backendCapabilityName,
const BackendCapabilities capabilities 
)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 30 of file BackendHelper.cpp.

References BackendOptions::GetOption(), and BackendOptions::GetOptionCount().

Referenced by GetCapability(), HasCapability(), LayerSupportHandle::IsConvolution2dSupported(), LayerSupportHandle::IsDepthwiseConvolutionSupported(), LayerSupportHandle::IsDilatedDepthwiseConvolutionSupported(), LayerSupportHandle::IsFullyConnectedSupported(), and TEST_SUITE().

32 {
33  for (size_t i=0; i < capabilities.GetOptionCount(); i++)
34  {
35  const auto& capability = capabilities.GetOption(i);
36  if (backendCapabilityName == capability.GetName())
37  {
38  return capability;
39  }
40  }
41  return EmptyOptional();
42 }

◆ GetCapability() [2/2]

Optional< const BackendOptions::BackendOption > GetCapability ( const std::string &  backendCapabilityName,
const armnn::BackendId backend 
)

Returns a BackendCapability if the backend lists the capability The BackendCapability must then be inspected to check whether or not that BackendCapability is supported Otherwise returns an EmptyOptional if the BackendCapability is unlisted.

Definition at line 44 of file BackendHelper.cpp.

References BackendRegistryInstance(), and GetCapability().

46 {
47  auto const& backendRegistry = armnn::BackendRegistryInstance();
48  if (backendRegistry.IsBackendRegistered(backend))
49  {
50  auto factoryFunc = backendRegistry.GetFactory(backend);
51  auto backendObject = factoryFunc();
52  auto capabilities = backendObject->GetCapabilities();
53  return GetCapability(backendCapabilityName, capabilities);
54  }
55  return EmptyOptional();
56 }
Optional< const BackendOptions::BackendOption > GetCapability(const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
BackendRegistry & BackendRegistryInstance()

◆ GetClContext()

const armnn::ClContext* armnn::GetClContext ( const void *  buf)
inline

Definition at line 140 of file ClContextSchema_generated.h.

Referenced by ClContextDeserializer::DeserializeFromBinary().

140  {
141  return flatbuffers::GetRoot<armnn::ClContext>(buf);
142 }

◆ GetComparisonOperationAsCString()

constexpr char const* armnn::GetComparisonOperationAsCString ( ComparisonOperation  operation)

Definition at line 57 of file TypesUtils.hpp.

References Equal, Greater, GreaterOrEqual, Less, LessOrEqual, and NotEqual.

Referenced by armnnTfLiteParser::ComputeWrappedIndex(), RefComparisonWorkload::ExecuteAsync(), and StringifyLayerParameters< ComparisonDescriptor >::Serialize().

58 {
59  switch (operation)
60  {
61  case ComparisonOperation::Equal: return "Equal";
62  case ComparisonOperation::Greater: return "Greater";
63  case ComparisonOperation::GreaterOrEqual: return "GreaterOrEqual";
64  case ComparisonOperation::Less: return "Less";
65  case ComparisonOperation::LessOrEqual: return "LessOrEqual";
66  case ComparisonOperation::NotEqual: return "NotEqual";
67  default: return "Unknown";
68  }
69 }

◆ GetComputeDeviceAsCString()

constexpr char const* armnn::GetComputeDeviceAsCString ( Compute  compute)

Deprecated function that will be removed together with the Compute enum.

Definition at line 34 of file BackendId.hpp.

References CpuAcc, CpuRef, and GpuAcc.

Referenced by GetSuitableBackendRegistered(), operator<<(), and TEST_SUITE().

35 {
36  switch (compute)
37  {
38  case armnn::Compute::CpuRef: return "CpuRef";
39  case armnn::Compute::CpuAcc: return "CpuAcc";
40  case armnn::Compute::GpuAcc: return "GpuAcc";
41  default: return "Unknown";
42  }
43 }
CPU Execution: Reference C++ kernels.
GPU Execution: OpenCL: ArmCompute.
CPU Execution: NEON: ArmCompute.

◆ GetConvolutionMethodString()

std::string GetConvolutionMethodString ( arm_compute::ConvolutionMethod &  convolutionMethod)
inline

Definition at line 37 of file ClWorkloadUtils.hpp.

38 {
39  switch (convolutionMethod)
40  {
41  case arm_compute::ConvolutionMethod::FFT:
42  return "FFT";
43  case arm_compute::ConvolutionMethod::DIRECT:
44  return "Direct";
45  case arm_compute::ConvolutionMethod::GEMM:
46  return "GEMM";
47  case arm_compute::ConvolutionMethod::WINOGRAD:
48  return "Winograd";
49  default:
50  return "Unknown";
51  }
52 }

◆ GetDataLayoutName()

constexpr const char* armnn::GetDataLayoutName ( DataLayout  dataLayout)

◆ GetDataTypeName()

constexpr const char* armnn::GetDataTypeName ( DataType  dataType)

Definition at line 202 of file TypesUtils.hpp.

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by armnnTfLiteParser::AsFloatArray(), AttemptBackendAssignment(), CompareConstTensor(), ProfilingDetails::DetailsExist(), GetBiasDataType(), TfLiteParserImpl::GetBuffer(), RefTransposeWorkload< DataType >::GetName(), RefPermuteWorkload< DataType >::GetName(), RefDebugWorkload< DataType >::GetName(), armnnUtils::GetPerAxisParams(), TEST_SUITE(), LayerVerifierBase::VerifyConstTensors(), LayerVerifierBase::VerifyNameAndConnections(), and VerifyTensorInfoDataType().

203 {
204  switch (dataType)
205  {
206  case DataType::Float16: return "Float16";
207  case DataType::Float32: return "Float32";
208  case DataType::Signed64: return "Signed64";
209  case DataType::QAsymmU8: return "QAsymmU8";
210  case DataType::QAsymmS8: return "QAsymmS8";
211  case DataType::QSymmS8: return "QSymmS8";
212  case DataType::QSymmS16: return "QSymm16";
213  case DataType::Signed32: return "Signed32";
214  case DataType::Boolean: return "Boolean";
215  case DataType::BFloat16: return "BFloat16";
216 
217  default:
218  return "Unknown";
219  }
220 }

◆ GetDataTypeSize()

constexpr unsigned int armnn::GetDataTypeSize ( DataType  dataType)

Definition at line 151 of file TypesUtils.hpp.

References BFloat16, Boolean, Float16, Float32, QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, Signed32, and Signed64.

Referenced by MockTensorHandle::CanBeImported(), RefTensorHandle::CanBeImported(), DepthwiseConvolution2dDepthMul64Test(), RefDepthToSpaceWorkload::ExecuteAsync(), RefStridedSliceWorkload::ExecuteAsync(), RefSliceWorkload::ExecuteAsync(), RefShapeWorkload::ExecuteAsync(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), TensorInfo::GetNumBytes(), GetUnpaddedTensorStrides(), PermuteTensor(), ConvertConstPermuteLayersToConstLayers::Run(), and TEST_SUITE().

152 {
153  switch (dataType)
154  {
155  case DataType::BFloat16:
156  case DataType::Float16: return 2U;
157  case DataType::Float32:
158  case DataType::Signed32: return 4U;
159  case DataType::Signed64: return 8U;
160  case DataType::QAsymmU8: return 1U;
161  case DataType::QAsymmS8: return 1U;
162  case DataType::QSymmS8: return 1U;
163  case DataType::QSymmS16: return 2U;
164  case DataType::Boolean: return 1U;
165  default: return 0U;
166  }
167 }

◆ GetEventPtr() [1/2]

const Event* armnn::GetEventPtr ( const Event ptr)

Definition at line 109 of file Profiling.cpp.

Referenced by ProfilerImpl::AnalyzeEventSequenceAndWriteResults().

109 { return ptr;}

◆ GetEventPtr() [2/2]

const Event* armnn::GetEventPtr ( const std::unique_ptr< Event > &  ptr)

Definition at line 110 of file Profiling.cpp.

110 {return ptr.get(); }

◆ GetGraphForTesting()

Graph & GetGraphForTesting ( IOptimizedNetwork optNet)

Definition at line 49 of file TestUtils.cpp.

References IOptimizedNetwork::pOptimizedNetworkImpl.

Referenced by CheckRelatedLayers(), and TEST_SUITE().

50 {
51  return optNet->pOptimizedNetworkImpl->GetGraph();
52 }
std::unique_ptr< OptimizedNetworkImpl > pOptimizedNetworkImpl
Definition: INetwork.hpp:842

◆ GetILayerSupportByBackendId()

LayerSupportHandle GetILayerSupportByBackendId ( const armnn::BackendId backend)

Convenience function to retrieve the ILayerSupportHandle for a backend.

Definition at line 16 of file BackendHelper.cpp.

References BackendRegistryInstance(), BackendRegistry::GetFactory(), and BackendRegistry::IsBackendRegistered().

Referenced by TEST_SUITE().

17 {
18  BackendRegistry& backendRegistry = armnn::BackendRegistryInstance();
19 
20  if (!backendRegistry.IsBackendRegistered(backend))
21  {
22  return LayerSupportHandle(nullptr);
23  }
24 
25  auto factoryFunc = backendRegistry.GetFactory(backend);
26  auto backendObject = factoryFunc();
27  return LayerSupportHandle(backendObject->GetLayerSupport(), backend);
28 }
BackendRegistry & BackendRegistryInstance()

◆ GetInputTensor()

const armnn::ConstTensor armnn::GetInputTensor ( const LayerBindingId  layerId,
const InputTensors inputTensors 
)

Definition at line 1309 of file LoadedNetwork.cpp.

1310 {
1311  for (auto inputTensorPair : inputTensors)
1312  {
1313  LayerBindingId id = inputTensorPair.first;
1314  if (id == layerId)
1315  {
1316  return inputTensorPair.second;
1317  }
1318  }
1319  throw InvalidArgumentException("Input does not exist.");
1320 }
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290

◆ GetInputTensorData()

const DataType* armnn::GetInputTensorData ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 35 of file RefWorkloadUtils.hpp.

References GetOutputTensorData(), and ITensorHandle::Map().

36 {
37  const ITensorHandle* tensorHandle = data.m_Inputs[idx];
38  return reinterpret_cast<const DataType*>(tensorHandle->Map());
39 }
DataType
Definition: Types.hpp:48

◆ GetInputTensorDataBFloat16()

const BFloat16* armnn::GetInputTensorDataBFloat16 ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 79 of file RefWorkloadUtils.hpp.

80 {
81  return GetInputTensorData<BFloat16>(idx, data);
82 }

◆ GetInputTensorDataFloat()

const float* armnn::GetInputTensorDataFloat ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 55 of file RefWorkloadUtils.hpp.

56 {
57  return GetInputTensorData<float>(idx, data);
58 }

◆ GetInputTensorDataHalf()

const Half* armnn::GetInputTensorDataHalf ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 67 of file RefWorkloadUtils.hpp.

68 {
69  return GetInputTensorData<Half>(idx, data);
70 }

◆ GetLayerTypeAsCString()

char const * GetLayerTypeAsCString ( LayerType  type)

Definition at line 13 of file InternalTypes.cpp.

References ARMNN_ASSERT_MSG, and LIST_OF_LAYER_TYPE.

Referenced by AttemptBackendAssignment(), CheckScaleSetOnQuantizedType(), Connect(), TestInputLayerVisitor::ExecuteStrategy(), TestConvolution2dLayerVisitor::ExecuteStrategy(), StrategyBase< NoThrowStrategy >::ExecuteStrategy(), TestOutputLayerVisitor::ExecuteStrategy(), TestDepthwiseConvolution2dLayerVisitor::ExecuteStrategy(), TestFullyConnectedLayerVistor::ExecuteStrategy(), TestBatchNormalizationLayerVisitor::ExecuteStrategy(), TestConstantLayerVisitor::ExecuteStrategy(), TestLstmLayerVisitor::ExecuteStrategy(), TestQLstmLayerVisitor::ExecuteStrategy(), TestQuantizedLstmLayerVisitor::ExecuteStrategy(), ElementwiseBaseLayer::InferOutputShapes(), Layer::InferOutputShapes(), Graph::InferTensorInfos(), Graph::Print(), ReturnWithError(), Layer::SerializeLayerParameters(), Graph::SerializeToDot(), TEST_SUITE(), ElementwiseBaseLayer::ValidateTensorShapesFromInputs(), ElementwiseUnaryLayer::ValidateTensorShapesFromInputs(), Graph::VerifyConstantLayerSetTensorInfo(), and Layer::VerifyLayerConnections().

14 {
15  switch (type)
16  {
17 #define X(name) case LayerType::name: return #name;
19 #undef X
20  default:
21  ARMNN_ASSERT_MSG(false, "Unknown layer type");
22  return "Unknown";
23  }
24 }
#define LIST_OF_LAYER_TYPE
This list uses X macro technique.
Definition: Types.hpp:388
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ GetLogicalBinaryOperationAsCString()

constexpr char const* armnn::GetLogicalBinaryOperationAsCString ( LogicalBinaryOperation  operation)

Definition at line 87 of file TypesUtils.hpp.

References LogicalAnd, and LogicalOr.

Referenced by RefLogicalBinaryWorkload::ExecuteAsync().

88 {
89  switch (operation)
90  {
91  case LogicalBinaryOperation::LogicalAnd: return "LogicalAnd";
92  case LogicalBinaryOperation::LogicalOr: return "LogicalOr";
93  default: return "Unknown";
94  }
95 }

◆ GetMemBlockStrategyTypeName()

constexpr const char* armnn::GetMemBlockStrategyTypeName ( MemBlockStrategyType  memBlockStrategyType)

Definition at line 264 of file TypesUtils.hpp.

References MultiAxisPacking, and SingleAxisPacking.

Referenced by RuntimeImpl::RuntimeImpl().

265 {
266  switch (memBlockStrategyType)
267  {
268  case MemBlockStrategyType::SingleAxisPacking: return "SingleAxisPacking";
269  case MemBlockStrategyType::MultiAxisPacking: return "MultiAxisPacking";
270  default: return "Unknown";
271  }
272 }

◆ GetMemoryOptimizerStrategy()

std::unique_ptr<IMemoryOptimizerStrategy> armnn::GetMemoryOptimizerStrategy ( const std::string &  strategyName)

Definition at line 36 of file MemoryOptimizerStrategyLibrary.hpp.

Referenced by main(), RuntimeImpl::RuntimeImpl(), and TEST_SUITE().

37 {
38  const auto& strategyFactoryMap = GetStrategyFactories();
39  auto strategyFactory = strategyFactoryMap.find(strategyName);
40  if (strategyFactory != GetStrategyFactories().end())
41  {
42  return strategyFactory->second->CreateMemoryOptimizerStrategy();
43  }
44  return nullptr;
45 }

◆ GetMemoryOptimizerStrategyNames()

const std::vector<std::string> armnn::GetMemoryOptimizerStrategyNames ( )

Definition at line 47 of file MemoryOptimizerStrategyLibrary.hpp.

Referenced by ParseOptions(), and TEST_SUITE().

48 {
49  const auto& strategyFactoryMap = GetStrategyFactories();
50  std::vector<std::string> strategyNames;
51  for (const auto& strategyFactory : strategyFactoryMap)
52  {
53  strategyNames.emplace_back(strategyFactory.first);
54  }
55  return strategyNames;
56 }

◆ GetModelOptionsForTesting()

ModelOptions & GetModelOptionsForTesting ( IOptimizedNetwork optNet)

Definition at line 54 of file TestUtils.cpp.

References IOptimizedNetwork::pOptimizedNetworkImpl.

Referenced by CheckRelatedLayers(), and TEST_SUITE().

55 {
56  return optNet->pOptimizedNetworkImpl->GetModelOptions();
57 }
std::unique_ptr< OptimizedNetworkImpl > pOptimizedNetworkImpl
Definition: INetwork.hpp:842

◆ GetNormalizationAlgorithmChannelAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmChannelAsCString ( NormalizationAlgorithmChannel  channel)

Definition at line 234 of file TypesUtils.hpp.

References Across, and Within.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

235 {
236  switch (channel)
237  {
238  case NormalizationAlgorithmChannel::Across: return "Across";
239  case NormalizationAlgorithmChannel::Within: return "Within";
240  default: return "Unknown";
241  }
242 }

◆ GetNormalizationAlgorithmMethodAsCString()

constexpr const char* armnn::GetNormalizationAlgorithmMethodAsCString ( NormalizationAlgorithmMethod  method)

Definition at line 244 of file TypesUtils.hpp.

References LocalBrightness, and LocalContrast.

Referenced by StringifyLayerParameters< NormalizationDescriptor >::Serialize().

245 {
246  switch (method)
247  {
248  case NormalizationAlgorithmMethod::LocalBrightness: return "LocalBrightness";
249  case NormalizationAlgorithmMethod::LocalContrast: return "LocalContrast";
250  default: return "Unknown";
251  }
252 }

◆ GetNumActivations()

unsigned int armnn::GetNumActivations ( const TensorInfo inputInfo)

Definition at line 16 of file RefFullyConnectedWorkload.cpp.

References TensorInfo::GetNumDimensions(), and TensorInfo::GetShape().

17 {
18  unsigned int numActivations = 1; // Total number of activations in the input.
19  for (unsigned int i = 1; i < inputInfo.GetNumDimensions(); i++)
20  {
21  numActivations *= inputInfo.GetShape()[i];
22  }
23  return numActivations;
24 }

◆ GetNumberOfCacheFiles()

unsigned int GetNumberOfCacheFiles ( const armnn::BackendId backend)

Returns the number of cached files if backend supports caching.

Definition at line 129 of file BackendHelper.cpp.

References BackendRegistryInstance().

130 {
131  auto const& backendRegistry = armnn::BackendRegistryInstance();
132  if (backendRegistry.IsBackendRegistered(backend))
133  {
134  auto factoryFunc = backendRegistry.GetFactory(backend);
135  auto backendObject = factoryFunc();
136  return backendObject->GetNumberOfCacheFiles();
137  }
138  return 0;
139 }
BackendRegistry & BackendRegistryInstance()

◆ GetNumInputs()

◆ GetOffset()

unsigned int armnn::GetOffset ( const TensorShape shape,
unsigned int  b,
unsigned int  h,
unsigned int  w,
unsigned int  c,
const DataLayoutIndexed dataLayout 
)

Definition at line 15 of file SpaceToBatchNd.cpp.

References DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), and NHWC.

Referenced by SpaceToBatchNd(), and SpaceToDepth().

21 {
22  if (dataLayout.GetDataLayout() == DataLayout::NHWC)
23  {
24  return ((b * shape[dataLayout.GetHeightIndex()] + h) * shape[dataLayout.GetWidthIndex()] + w) *
25  shape[dataLayout.GetChannelsIndex()] + c;
26  }
27  else
28  {
29  return ((b * shape[dataLayout.GetChannelsIndex()] + c) * shape[dataLayout.GetHeightIndex()] + h) *
30  shape[dataLayout.GetWidthIndex()] + w;
31  }
32 }
unsigned int GetWidthIndex() const
unsigned int GetHeightIndex() const
armnn::DataLayout GetDataLayout() const
unsigned int GetChannelsIndex() const

◆ GetOutputShapeRoundingAsCString()

constexpr char const* armnn::GetOutputShapeRoundingAsCString ( OutputShapeRounding  rounding)

Definition at line 108 of file TypesUtils.hpp.

References Ceiling, and Floor.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

109 {
110  switch (rounding)
111  {
112  case OutputShapeRounding::Ceiling: return "Ceiling";
113  case OutputShapeRounding::Floor: return "Floor";
114  default: return "Unknown";
115  }
116 }

◆ GetOutputTensor()

const armnn::Tensor armnn::GetOutputTensor ( const LayerBindingId  layerId,
const OutputTensors outputTensors 
)

Definition at line 1322 of file LoadedNetwork.cpp.

1323 {
1324  for (auto outputTensorPair : outputTensors)
1325  {
1326  LayerBindingId id = outputTensorPair.first;
1327  if (id == layerId)
1328  {
1329  return outputTensorPair.second;
1330  }
1331  }
1332  throw InvalidArgumentException("Output does not exist.");
1333 }
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290

◆ GetOutputTensorData() [1/2]

DataType* armnn::GetOutputTensorData ( ITensorHandle tensorHandle)

Definition at line 49 of file RefWorkloadUtils.hpp.

References ITensorHandle::Map().

50 {
51  return reinterpret_cast<DataType*>(tensorHandle->Map());
52 }
DataType
Definition: Types.hpp:48

◆ GetOutputTensorData() [2/2]

DataType * GetOutputTensorData ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 168 of file ClWorkloadUtils.hpp.

References ITensorHandle::Map().

Referenced by GetInputTensorData(), and SetNeonSliceData().

169 {
170  ITensorHandle* tensorHandle = data.m_Outputs[idx];
171  return reinterpret_cast<DataType*>(tensorHandle->Map());
172 }
DataType
Definition: Types.hpp:48

◆ GetOutputTensorDataBFloat16()

BFloat16* armnn::GetOutputTensorDataBFloat16 ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 85 of file RefWorkloadUtils.hpp.

86 {
87  return GetOutputTensorData<BFloat16>(idx, data);
88 }

◆ GetOutputTensorDataFloat()

float* armnn::GetOutputTensorDataFloat ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 61 of file RefWorkloadUtils.hpp.

62 {
63  return GetOutputTensorData<float>(idx, data);
64 }

◆ GetOutputTensorDataHalf()

Half* armnn::GetOutputTensorDataHalf ( unsigned int  idx,
const PayloadType &  data 
)

Definition at line 73 of file RefWorkloadUtils.hpp.

74 {
75  return GetOutputTensorData<Half>(idx, data);
76 }

◆ GetPaddingMethodAsCString()

constexpr char const* armnn::GetPaddingMethodAsCString ( PaddingMethod  method)

Definition at line 118 of file TypesUtils.hpp.

References Exclude, and IgnoreValue.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

119 {
120  switch (method)
121  {
122  case PaddingMethod::Exclude: return "Exclude";
123  case PaddingMethod::IgnoreValue: return "IgnoreValue";
124  default: return "Unknown";
125  }
126 }

◆ GetPaddingModeAsCString()

constexpr char const* armnn::GetPaddingModeAsCString ( PaddingMode  mode)

Definition at line 128 of file TypesUtils.hpp.

References Constant, Reflect, and Symmetric.

Referenced by StringifyLayerParameters< PadDescriptor >::Serialize().

129 {
130  switch (mode)
131  {
132  case PaddingMode::Constant: return "Exclude";
133  case PaddingMode::Symmetric: return "Symmetric";
134  case PaddingMode::Reflect: return "Reflect";
135  default: return "Unknown";
136  }
137 }

◆ GetPoolingAlgorithmAsCString()

constexpr char const* armnn::GetPoolingAlgorithmAsCString ( PoolingAlgorithm  pooling)

Definition at line 97 of file TypesUtils.hpp.

References Average, L2, and Max.

Referenced by StringifyLayerParameters< Pooling2dDescriptor >::Serialize(), and StringifyLayerParameters< Pooling3dDescriptor >::Serialize().

98 {
99  switch (pooling)
100  {
101  case PoolingAlgorithm::Average: return "Average";
102  case PoolingAlgorithm::Max: return "Max";
103  case PoolingAlgorithm::L2: return "L2";
104  default: return "Unknown";
105  }
106 }

◆ GetProfilerEventSequenceSize()

size_t armnn::GetProfilerEventSequenceSize ( armnn::IProfiler profiler)

Definition at line 19 of file ProfilerTests.cpp.

References ProfilerManager::GetInstance(), ProfilerManager::GetProfiler(), and ProfilerManager::RegisterProfiler().

Referenced by TEST_SUITE().

20 {
21  if (!profiler)
22  {
23  return static_cast<size_t>(-1);
24  }
25 
26  return profiler->pProfilerImpl->m_EventSequence.size();
27 }

◆ GetProfilingService()

arm::pipe::IProfilingService & GetProfilingService ( armnn::RuntimeImpl runtime)

Definition at line 59 of file TestUtils.cpp.

Referenced by CheckRelatedLayers(), TEST_SUITE(), and VerifyPostOptimisationStructureTestImpl().

60 {
61  return *(runtime->m_ProfilingService.get());
62 }

◆ GetReduceOperationAsCString()

constexpr char const* armnn::GetReduceOperationAsCString ( ReduceOperation  reduce_operation)

Definition at line 139 of file TypesUtils.hpp.

References Max, Mean, Min, Prod, and Sum.

Referenced by StringifyLayerParameters< ReduceDescriptor >::Serialize().

140 {
141  switch (reduce_operation)
142  {
143  case ReduceOperation::Sum: return "Sum";
144  case ReduceOperation::Max: return "Max";
145  case ReduceOperation::Mean: return "Mean";
146  case ReduceOperation::Min: return "Min";
147  case ReduceOperation::Prod: return "Prod";
148  default: return "Unknown";
149  }
150 }

◆ GetResizeMethodAsCString()

constexpr const char* armnn::GetResizeMethodAsCString ( ResizeMethod  method)

Definition at line 254 of file TypesUtils.hpp.

References Bilinear, and NearestNeighbor.

Referenced by StringifyLayerParameters< ResizeDescriptor >::Serialize().

255 {
256  switch (method)
257  {
258  case ResizeMethod::Bilinear: return "Bilinear";
259  case ResizeMethod::NearestNeighbor: return "NearestNeighbour";
260  default: return "Unknown";
261  }
262 }

◆ GetSizePrefixedClContext()

const armnn::ClContext* armnn::GetSizePrefixedClContext ( const void *  buf)
inline

Definition at line 144 of file ClContextSchema_generated.h.

144  {
145  return flatbuffers::GetSizePrefixedRoot<armnn::ClContext>(buf);
146 }

◆ GetStatusAsCString()

constexpr char const* armnn::GetStatusAsCString ( Status  status)

Definition at line 17 of file TypesUtils.hpp.

References Failure, and Success.

Referenced by operator<<().

18 {
19  switch (status)
20  {
21  case armnn::Status::Success: return "Status::Success";
22  case armnn::Status::Failure: return "Status::Failure";
23  default: return "Unknown";
24  }
25 }

◆ GetTensorInfo()

const TensorInfo& armnn::GetTensorInfo ( const ITensorHandle tensorHandle)
inline

float32 helpers

Definition at line 26 of file RefWorkloadUtils.hpp.

References RefTensorHandle::GetTensorInfo().

Referenced by BatchNormImpl(), Concatenate(), RefGatherNdWorkload::ExecuteAsync(), RefStridedSliceWorkload::ExecuteAsync(), RefDepthToSpaceWorkload::ExecuteAsync(), RefFakeQuantizationFloat32Workload::ExecuteAsync(), RefFillWorkload::ExecuteAsync(), RefChannelShuffleWorkload::ExecuteAsync(), RefSpaceToDepthWorkload::ExecuteAsync(), RefFloorWorkload::ExecuteAsync(), RefConvertBf16ToFp32Workload::ExecuteAsync(), RefConvertFp16ToFp32Workload::ExecuteAsync(), RefLogSoftmaxWorkload::ExecuteAsync(), RefConvertFp32ToBf16Workload::ExecuteAsync(), RefConvertFp32ToFp16Workload::ExecuteAsync(), RefPadWorkload::ExecuteAsync(), RefActivationWorkload::ExecuteAsync(), RefReshapeWorkload::ExecuteAsync(), RefResizeWorkload::ExecuteAsync(), RefSoftmaxWorkload::ExecuteAsync(), RefSpaceToBatchNdWorkload::ExecuteAsync(), RefDepthwiseConvolution2dWorkload::ExecuteAsync(), RefStackWorkload::ExecuteAsync(), RefInstanceNormalizationWorkload::ExecuteAsync(), RefSliceWorkload::ExecuteAsync(), RefDetectionPostProcessWorkload::ExecuteAsync(), RefDequantizeWorkload::ExecuteAsync(), RefArgMinMaxWorkload::ExecuteAsync(), RefPreluWorkload::ExecuteAsync(), RefQuantizeWorkload::ExecuteAsync(), RefBatchNormalizationWorkload::ExecuteAsync(), RefBatchToSpaceNdWorkload::ExecuteAsync(), RefCastWorkload::ExecuteAsync(), RefL2NormalizationWorkload::ExecuteAsync(), RefNormalizationWorkload::ExecuteAsync(), RefReduceWorkload::ExecuteAsync(), RefLstmWorkload::ExecuteAsync(), RefMeanWorkload::ExecuteAsync(), RefPooling2dWorkload::ExecuteAsync(), RefQLstmWorkload::ExecuteAsync(), RefPooling3dWorkload::ExecuteAsync(), RefConvolution2dWorkload::ExecuteAsync(), RefElementwiseUnaryWorkload::ExecuteAsync(), RefConstantWorkload::ExecuteAsync(), RefLogicalBinaryWorkload::ExecuteAsync(), RefLogicalUnaryWorkload::ExecuteAsync(), RefConvolution3dWorkload::ExecuteAsync(), RefComparisonWorkload::ExecuteAsync(), RefGatherWorkload::ExecuteAsync(), RefShapeWorkload::ExecuteAsync(), RefTransposeConvolution2dWorkload::ExecuteAsync(), RefFullyConnectedWorkload::ExecuteAsync(), RefRankWorkload::ExecuteAsync(), RefUnidirectionalSequenceLstmWorkload::ExecuteAsync(), RefPermuteWorkload< DataType >::ExecuteAsync(), RefTransposeWorkload< DataType >::ExecuteAsync(), RefElementwiseWorkload< Functor, ParentDescriptor, DebugString >::ExecuteAsync(), RefDebugWorkload< DataType >::ExecuteAsync(), OutputSlot::GetNumConnections(), OutputSlot::MoveAllConnections(), RefComparisonWorkload::PostAllocationConfigure(), Split(), Splitter(), SwitchLayer::ValidateTensorShapesFromInputs(), DetectionPostProcessLayer::ValidateTensorShapesFromInputs(), SplitterLayer::ValidateTensorShapesFromInputs(), LstmLayer::ValidateTensorShapesFromInputs(), ConcatLayer::ValidateTensorShapesFromInputs(), QuantizedLstmLayer::ValidateTensorShapesFromInputs(), and QLstmLayer::ValidateTensorShapesFromInputs().

27 {
28  // We know that reference workloads use RefTensorHandles for inputs and outputs
29  const RefTensorHandle* refTensorHandle =
30  PolymorphicDowncast<const RefTensorHandle*>(tensorHandle);
31  return refTensorHandle->GetTensorInfo();
32 }

◆ GetTimeDuration()

std::chrono::duration<double, std::milli> armnn::GetTimeDuration ( std::chrono::high_resolution_clock::time_point  start_time)
inline

Definition at line 19 of file Timer.hpp.

References GetTimeNow().

Referenced by CheckInferenceTimeThreshold(), RuntimeImpl::EnqueueWorkload(), RuntimeImpl::Execute(), InferenceModel< IParser, TDataType >::InferenceModel(), MainImpl(), InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), RuntimeImpl::RuntimeImpl(), and RuntimeImpl::~RuntimeImpl().

21 {
22  return std::chrono::duration<double, std::milli>(GetTimeNow() - start_time);
23 }
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14

◆ GetTimeNow()

◆ GetUnaryOperationAsCString()

constexpr char const* armnn::GetUnaryOperationAsCString ( UnaryOperation  operation)

Definition at line 71 of file TypesUtils.hpp.

References Abs, Exp, Log, LogicalNot, Neg, Rsqrt, Sin, and Sqrt.

Referenced by armnnTfLiteParser::ComputeWrappedIndex(), RefLogicalUnaryWorkload::ExecuteAsync(), RefElementwiseUnaryWorkload::ExecuteAsync(), StringifyLayerParameters< ElementwiseUnaryDescriptor >::Serialize(), and TEST_SUITE().

72 {
73  switch (operation)
74  {
75  case UnaryOperation::Abs: return "Abs";
76  case UnaryOperation::Exp: return "Exp";
77  case UnaryOperation::Sqrt: return "Sqrt";
78  case UnaryOperation::Rsqrt: return "Rsqrt";
79  case UnaryOperation::Neg: return "Neg";
80  case UnaryOperation::Log: return "Log";
81  case UnaryOperation::LogicalNot: return "LogicalNot";
82  case UnaryOperation::Sin: return "Sin";
83  default: return "Unknown";
84  }
85 }

◆ GetUnpaddedTensorStrides()

TensorShape GetUnpaddedTensorStrides ( const TensorInfo tensorInfo)

Definition at line 15 of file TensorHandle.cpp.

References TensorInfo::GetDataType(), GetDataTypeSize(), and TensorInfo::GetShape().

Referenced by MockTensorHandle::GetStrides(), SampleTensorHandle::GetStrides(), RefTensorHandle::GetStrides(), and ConstTensorHandle::GetStrides().

16 {
17  TensorShape shape(tensorInfo.GetShape());
18  auto size = GetDataTypeSize(tensorInfo.GetDataType());
19  auto runningSize = size;
20  std::vector<unsigned int> strides(shape.GetNumDimensions());
21  auto lastIdx = shape.GetNumDimensions()-1;
22  for (unsigned int i=0; i < lastIdx ; i++)
23  {
24  strides[lastIdx-i] = runningSize;
25  runningSize *= shape[lastIdx-i];
26  }
27  strides[0] = runningSize;
28  return TensorShape(shape.GetNumDimensions(), strides.data());
29 }
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:151

◆ GetVersion()

const std::string GetVersion ( )

Definition at line 77 of file Utils.cpp.

References ARMNN_VERSION.

78 {
79  return ARMNN_VERSION;
80 }
#define ARMNN_VERSION
ARMNN_VERSION: "X.Y.Z" where: X = Major version number Y = Minor version number Z = Patch version num...
Definition: Version.hpp:22

◆ HasCapability() [1/4]

bool HasCapability ( const std::string &  name,
const BackendCapabilities capabilities 
)

Convenience function to check if a capability exists in a BackendCapabilites struct.

Definition at line 58 of file BackendHelper.cpp.

References GetCapability().

Referenced by HasCapability(), LoadedNetwork::ImportInputs(), LoadedNetwork::ImportOutputs(), LoadedNetwork::MakeLoadedNetwork(), RuntimeImpl::RuntimeImpl(), and TEST_SUITE().

59 {
60  return GetCapability(name, capabilities).has_value();
61 }
Optional< const BackendOptions::BackendOption > GetCapability(const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...

◆ HasCapability() [2/4]

bool HasCapability ( const std::string &  name,
const armnn::BackendId backend 
)

Convenience function to check if a capability exists in a backend.

Definition at line 63 of file BackendHelper.cpp.

References GetCapability().

64 {
65  return GetCapability(name, backend).has_value();
66 }
Optional< const BackendOptions::BackendOption > GetCapability(const std::string &backendCapabilityName, const BackendCapabilities &capabilities)
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...

◆ HasCapability() [3/4]

bool HasCapability ( const BackendOptions::BackendOption capability,
const BackendCapabilities capabilities 
)

Convenience function to check if a given capability matches a capability in a BackendCapabilities struct.

Definition at line 68 of file BackendHelper.cpp.

References BackendOptions::Var::AsBool(), BackendOptions::Var::AsFloat(), BackendOptions::Var::AsInt(), BackendOptions::Var::AsString(), BackendOptions::Var::AsUnsignedInt(), BackendOptions::BackendOption::GetName(), BackendOptions::GetOption(), BackendOptions::GetOptionCount(), BackendOptions::BackendOption::GetValue(), BackendOptions::Var::IsBool(), BackendOptions::Var::IsFloat(), BackendOptions::Var::IsInt(), BackendOptions::Var::IsString(), and BackendOptions::Var::IsUnsignedInt().

69 {
70  for (size_t i=0; i < capabilities.GetOptionCount(); i++)
71  {
72  const auto& backendCapability = capabilities.GetOption(i);
73  if (capability.GetName() == backendCapability.GetName())
74  {
75  if (capability.GetValue().IsBool() && backendCapability.GetValue().IsBool())
76  {
77  return capability.GetValue().AsBool() == backendCapability.GetValue().AsBool();
78  }
79  else if(capability.GetValue().IsFloat() && backendCapability.GetValue().IsFloat())
80  {
81  return capability.GetValue().AsFloat() == backendCapability.GetValue().AsFloat();
82  }
83  else if(capability.GetValue().IsInt() && backendCapability.GetValue().IsInt())
84  {
85  return capability.GetValue().AsInt() == backendCapability.GetValue().AsInt();
86  }
87  else if(capability.GetValue().IsString() && backendCapability.GetValue().IsString())
88  {
89  return capability.GetValue().AsString() == backendCapability.GetValue().AsString();
90  }
91  else if(capability.GetValue().IsUnsignedInt() && backendCapability.GetValue().IsUnsignedInt())
92  {
93  return capability.GetValue().AsUnsignedInt() == backendCapability.GetValue().AsUnsignedInt();
94  }
95  }
96  }
97  return false;
98 }

◆ HasCapability() [4/4]

bool HasCapability ( const BackendOptions::BackendOption backendOption,
const armnn::BackendId backend 
)

Convenience function to check if a given capability matches a capability in a backend.

Definition at line 100 of file BackendHelper.cpp.

References BackendRegistryInstance(), and HasCapability().

101 {
102  auto const& backendRegistry = armnn::BackendRegistryInstance();
103  if (backendRegistry.IsBackendRegistered(backend))
104  {
105  auto factoryFunc = backendRegistry.GetFactory(backend);
106  auto backendObject = factoryFunc();
107  auto capabilities = backendObject->GetCapabilities();
108  return HasCapability(backendOption, capabilities);
109  }
110  return false;
111 }
bool HasCapability(const std::string &name, const BackendCapabilities &capabilities)
Convenience function to check if a capability exists in a BackendCapabilites struct.
BackendRegistry & BackendRegistryInstance()

◆ IgnoreUnused()

void armnn::IgnoreUnused ( Ts &&  ...)
inline

Definition at line 14 of file IgnoreUnused.hpp.

Referenced by ChannelShuffleLayer::Accept(), ConvertFp32ToFp16Layer::Accept(), MapLayer::Accept(), MemCopyLayer::Accept(), MemImportLayer::Accept(), ConvertBf16ToFp32Layer::Accept(), ConvertFp16ToFp32Layer::Accept(), ConvertFp32ToBf16Layer::Accept(), CastLayer::Accept(), DebugLayer::Accept(), UnmapLayer::Accept(), FakeQuantizationLayer::Accept(), GatherNdLayer::Accept(), PreCompiledLayer::Accept(), ShapeLayer::Accept(), Convolution3dLayer::Accept(), UnidirectionalSequenceLstmLayer::Accept(), IInferenceTestCaseProvider::AddCommandLineOptions(), AdditionAfterMaxPoolTest(), AdditionBroadcast1ElementTestImpl(), AdditionBroadcastTestImpl(), ClBackendDefaultAllocator::allocate(), DefaultAllocator::allocate(), ArgMinMax(), BoundedReLuTestCommon(), BoundedReLuUint8UpperAndLowerBoundTest(), CalculateSlotOptionForOutput(), ITensorHandle::CanBeImported(), ClTensorHandle::CanBeImported(), CastTest(), ParserFlatbuffersSerializeFixture::CheckTensors(), ClassifierTestCase< TTestCaseDatabase, TModel >::ClassifierTestCase(), ClContextControl::ClContextControl(), ClConvolution3dWorkload::ClConvolution3dWorkload(), SpaceToBatchNdLayer::Clone(), SpaceToDepthLayer::Clone(), DynamicBackendUtils::CloseHandle(), ClUnidirectionalSequenceLstmFloatWorkloadValidate(), CompareActivationTestImpl(), CompareAdditionTest(), CompareBatchNormTest(), CompareMultiplicationTest(), CompareVector(), ConcatDifferentInputOutputQParamTest(), ConcatTestImpl(), ConcatUint16Test(), ConcatUint8DifferentQParamsTest(), ConcatUint8Test(), ConstantLinearActivationTestCommon(), ConvertBf16ToFp32Test(), ConvertFp32ToBf16Test(), Convolution2d3x3Stride2x2BFloat16SmallValueTest(), Convolution2d3x3Stride2x2BFloat16Test(), CopyTensorContentsGeneric(), MockBackend::CreateBackendProfilingContext(), RefTensorHandleFactory::CreateSubTensorHandle(), SampleDynamicTensorHandleFactory::CreateSubTensorHandle(), SampleDynamicWorkloadFactory::CreateSubTensorHandle(), RefWorkloadFactory::CreateSubTensorHandle(), RefTensorHandleFactory::CreateTensorHandle(), SampleDynamicTensorHandleFactory::CreateTensorHandle(), MockTensorHandleFactory::CreateTensorHandle(), ClWorkloadFactory::CreateTensorHandle(), ITensorHandleFactory::CreateTensorHandle(), RefWorkloadFactory::CreateTensorHandle(), MockWorkloadFactory::CreateTensorHandle(), OutputLayer::CreateTensorHandles(), InputLayer::CreateWorkload(), MemCopyLayer::CreateWorkload(), MemImportLayer::CreateWorkload(), MergeLayer::CreateWorkload(), OutputLayer::CreateWorkload(), UnmapLayer::CreateWorkload(), MapLayer::CreateWorkload(), StandInLayer::CreateWorkload(), IBackendInternal::CreateWorkloadFactory(), QASymm8Decoder::DecodeTensor(), QASymmS8Decoder::DecodeTensor(), QSymmS8Decoder::DecodeTensor(), QSymm16Decoder::DecodeTensor(), BFloat16Decoder::DecodeTensor(), Float16Decoder::DecodeTensor(), Float32Decoder::DecodeTensor(), ScaledInt32Decoder::DecodeTensor(), Int32Decoder::DecodeTensor(), Int32ToInt32tDecoder::DecodeTensor(), BooleanDecoder::DecodeTensor(), BooleanDecoderBool::DecodeTensor(), QSymm8PerAxisDecoder::DecodeTensor(), Dequantize(), SelectiveQuantizer< T, false >::Dequantize(), SelectiveQuantizer< armnn::Half, false >::Dequantize(), SelectiveQuantizer< armnn::BFloat16, false >::Dequantize(), DetectionPostProcess(), DivisionByZeroTest(), ProfilerImpl::EndEvent(), RefStridedSliceWorkload::ExecuteAsync(), SerializerStrategy::ExecuteStrategy(), TestInputLayerVisitor::ExecuteStrategy(), TestConvolution2dLayerVisitor::ExecuteStrategy(), LayerVerifierBase::ExecuteStrategy(), StrategyBase< NoThrowStrategy >::ExecuteStrategy(), MemCopyLayer::ExecuteStrategy(), MemImportLayer::ExecuteStrategy(), FakeQuantizationLayer::ExecuteStrategy(), PreCompiledLayer::ExecuteStrategy(), LayerVerifierBaseWithDescriptor< Descriptor >::ExecuteStrategy(), TestOutputLayerVisitor::ExecuteStrategy(), TestDepthwiseConvolution2dLayerVisitor::ExecuteStrategy(), TestFullyConnectedLayerVistor::ExecuteStrategy(), LayerVerifierBaseWithDescriptorAndConstants< Descriptor >::ExecuteStrategy(), TestBatchNormalizationLayerVisitor::ExecuteStrategy(), TestConstantLayerVisitor::ExecuteStrategy(), TestLstmLayerVisitor::ExecuteStrategy(), TestQLstmLayerVisitor::ExecuteStrategy(), TestQuantizedLstmLayerVisitor::ExecuteStrategy(), ExecutionFrame::ExecuteWorkloads(), exit_capture(), FakeQuantizationTest(), FalseFunc(), FalseFuncF16(), FalseFuncF32(), FalseFuncI32(), FalseFuncU8(), FalseInputFuncF16(), FalseInputFuncF32(), FalseOutputFuncF16(), FalseOutputFuncF32(), Gather(), ClImportTensorHandleFactory::GetCapabilities(), NeonTensorHandleFactory::GetCapabilities(), ITensorHandleFactory::GetCapabilities(), MockCounterDirectory::GetCounter(), MockCounterDirectory::GetCounterSet(), MockCounterDirectory::GetDevice(), DynamicBackendUtils::GetEntryPoint(), armnnSerializer::GetFlatBufferArgMinMaxFunction(), GetImageDataInArmNnLayoutAsNormalizedFloats(), DefaultAllocator::GetMemoryRegionAtOffset(), ClBackendDefaultAllocator::GetMemoryRegionAtOffset(), ICustomAllocator::GetMemoryRegionAtOffset(), IDeserializer::DeserializerImpl::GetNetworkInputBindingInfo(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), IDeserializer::DeserializerImpl::GetNormalizationDescriptor(), LoadedNetwork::GetOutputTensorInfo(), IDeserializer::DeserializerImpl::GetPooling2dDescriptor(), IDeserializer::DeserializerImpl::GetPooling3dDescriptor(), MockProfilingConnectionFactory::GetProfilingConnection(), DynamicBackendUtils::GetSharedObjects(), ITensorHandle::Import(), ClTensorHandle::Import(), ShapeLayer::InferOutputShapes(), SliceLayer::InferOutputShapes(), StackLayer::InferOutputShapes(), StandInLayer::InferOutputShapes(), ReshapeLayer::InferOutputShapes(), SplitterLayer::InferOutputShapes(), NeonLayerSupport::IsActivationSupported(), MockImportLayerSupport::IsAdditionSupported(), RefLayerSupport::IsArgMinMaxSupported(), RefLayerSupport::IsBatchNormalizationSupported(), RefLayerSupport::IsBatchToSpaceNdSupported(), RefLayerSupport::IsChannelShuffleSupported(), RefLayerSupport::IsComparisonSupported(), RefLayerSupport::IsConcatSupported(), NeonLayerSupport::IsConvertBf16ToFp32Supported(), NeonLayerSupport::IsConvertFp16ToFp32Supported(), NeonLayerSupport::IsConvertFp32ToBf16Supported(), NeonLayerSupport::IsConvertFp32ToFp16Supported(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthToSpaceSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), RefLayerSupport::IsDetectionPostProcessSupported(), RefLayerSupport::IsElementwiseUnarySupported(), RefLayerSupport::IsFakeQuantizationSupported(), ClLayerSupport::IsFillSupported(), NeonLayerSupport::IsFillSupported(), RefLayerSupport::IsFillSupported(), NeonLayerSupport::IsFloorSupported(), RefLayerSupport::IsFloorSupported(), MockImportLayerSupport::IsInputSupported(), RefLayerSupport::IsInstanceNormalizationSupported(), RefLayerSupport::IsL2NormalizationSupported(), ILayerSupport::IsLayerSupported(), ClLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogicalBinarySupported(), RefLayerSupport::IsLogSoftmaxSupported(), RefLayerSupport::IsLstmSupported(), RefLayerSupport::IsNormalizationSupported(), MockImportLayerSupport::IsOutputSupported(), RefLayerSupport::IsPadSupported(), RefLayerSupport::IsPermuteSupported(), RefLayerSupport::IsPooling2dSupported(), RefLayerSupport::IsPooling3dSupported(), RefLayerSupport::IsQLstmSupported(), RefLayerSupport::IsRankSupported(), RefLayerSupport::IsReduceSupported(), ClLayerSupport::IsReshapeSupported(), NeonLayerSupport::IsReshapeSupported(), RefLayerSupport::IsReshapeSupported(), RefLayerSupport::IsResizeSupported(), RefLayerSupport::IsShapeSupported(), RefLayerSupport::IsSliceSupported(), RefLayerSupport::IsSoftmaxSupported(), RefLayerSupport::IsSpaceToBatchNdSupported(), RefLayerSupport::IsSpaceToDepthSupported(), ClLayerSupport::IsSplitterSupported(), NeonLayerSupport::IsSplitterSupported(), RefLayerSupport::IsSplitterSupported(), RefLayerSupport::IsStackSupported(), RefLayerSupport::IsStridedSliceSupported(), RefLayerSupport::IsTransposeConvolution2dSupported(), RefLayerSupport::IsTransposeSupported(), RefLayerSupport::IsUnidirectionalSequenceLstmSupported(), Layer::Layer(), LogSoftmax(), ClImportTensorHandle::Map(), ClBackend::ClBackendCustomAllocatorMemoryRegion::map(), ClImportSubTensorHandle::Map(), MaximumSimpleTest(), MinimumBroadcast1ElementTest1(), MirrorPad2dTestCommon(), MirrorPad3dTestCommon(), MirrorPad4dTestCommon(), NeonConvolution3dWorkload::NeonConvolution3dWorkload(), DynamicBackendUtils::OpenHandle(), StubCommandHandler::operator()(), TestFunctorA::operator()(), TfLiteParserImpl::OutputShapeOfSqueeze(), Pad2dTestCommon(), Pad3dTestCommon(), Pad4dTestCommon(), PadQAsymmTestCommon(), PermuteInputsForConcat(), PermuteTensorData(), PreluTest(), IInferenceTestCaseProvider::ProcessCommandLineOptions(), YoloTestCase< Model >::ProcessResult(), SelectiveQuantizer< T, false >::Quantize(), SelectiveQuantizer< armnn::Half, false >::Quantize(), SelectiveQuantizer< armnn::BFloat16, false >::Quantize(), RankTest(), TestProfilingConnectionArmnnError::ReadPacket(), TestProfilingConnectionBadAckPacket::ReadPacket(), MockProfilingConnection::ReadPacket(), MockCounterDirectory::RegisterCounter(), BaseWorkload< Convolution2dQueueDescriptor >::ReplaceInputTensorHandle(), BaseWorkload< Convolution2dQueueDescriptor >::ReplaceOutputTensorHandle(), ConvertConstDequantisationLayersToConstLayersImpl::Run(), ConvertConstPermuteLayersToConstLayers::Run(), OptimizeInverseConversionsImpl::Run(), RedirectMembersToConstantInputsImpl::Run(), OptimizeInversePermutesImpl< PermuteType >::Run(), SquashEqualSiblingsImpl< Comparable >::Run(), FuseBatchNorm< ConvLayer, ArmnnType, T >::Run(), ConvertConstants< Converter, Predicate >::Run(), MockSendCounterPacket::SendCounterDirectoryPacket(), MockSendCounterPacket::SendPeriodicCounterCapturePacket(), MockSendCounterPacket::SendPeriodicCounterSelectionPacket(), SetLogFilter(), ClImportTensorHandle::SetMemoryGroup(), ClImportSubTensorHandle::SetMemoryGroup(), ShapeTest(), SimpleActivationTest(), SimpleConvertFp16ToFp32Test(), SimpleConvertFp32ToFp16Test(), SimpleConvolution2d3x3NhwcTestCommon(), SimpleConvolution2d3x3Stride2x2TestCommon(), SimpleConvolution2dNhwcTestImpl(), SimpleConvolution2dTestImpl(), SimpleFillTest(), SimpleFloorTest(), SimplePermuteTestImpl(), SimpleTransposeTestImpl(), Slice(), SqrtNNTest(), OpenClTimer::Start(), MemoryManager::StoreMemToAllocate(), Graph::SubstituteSubgraph(), TEST_SUITE(), TestDynamicBackendId(), TrueFunc(), UnidirectionalSequenceLstmInt8WithCifgWithPeepholeNoProjectionTest(), UnidirectionalSequenceLstmLayerInt8NoCifgWithPeepholeWithProjectionTest(), UnidirectionalSequenceLstmLayerInt8NoCifgWithPeepholeWithProjectionWithLayerNormTest(), UnidirectionalSequenceLstmLayerInt8Test(), UnidirectionalSequenceLstmLayerInt8TimeMajorTest(), UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTest(), UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTest(), UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTest(), ClBackend::ClBackendCustomAllocatorMemoryRegion::unmap(), ClBackend::UseCustomMemoryAllocator(), IBackendInternal::UseCustomMemoryAllocator(), MockProfilingServiceStatus::WaitForProfilingServiceActivation(), WorkingMemHandle::WorkingMemHandle(), TestProfilingConnectionBase::WritePacket(), Graph::LayerInGraph< InputLayer >::~LayerInGraph(), Graph::LayerInGraph< OutputLayer >::~LayerInGraph(), and ScopedProfilingEvent::~ScopedProfilingEvent().

14 {}

◆ InitializeArmComputeClTensorData()

void armnn::InitializeArmComputeClTensorData ( arm_compute::CLTensor &  clTensor,
const ConstTensorHandle handle 
)
inline

Definition at line 115 of file ClWorkloadUtils.hpp.

References ARMNN_ASSERT.

117 {
118  ARMNN_ASSERT(handle);
119 
120  armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
121  switch(handle->GetTensorInfo().GetDataType())
122  {
123  case DataType::Float16:
124  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
125  break;
126  case DataType::Float32:
127  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
128  break;
129  case DataType::QAsymmU8:
130  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
131  break;
132  case DataType::QAsymmS8:
133  case DataType::QSymmS8:
134  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int8_t>());
135  break;
136  case DataType::QSymmS16:
137  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int16_t>());
138  break;
139  case DataType::Signed32:
140  CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
141  break;
142  default:
143  ARMNN_ASSERT_MSG(false, "Unexpected tensor type.");
144  }
145 };
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void CopyArmComputeClTensorData(arm_compute::CLTensor &dstTensor, const T *srcData)
half_float::half Half
Definition: Half.hpp:18

◆ InitializeArmComputeTensorData()

void armnn::InitializeArmComputeTensorData ( arm_compute::Tensor &  tensor,
const ConstTensorHandle handle 
)
inline

Definition at line 60 of file NeonWorkloadUtils.hpp.

References ARMNN_ASSERT, ARMNN_ASSERT_MSG, CopyArmComputeTensorData(), Float16, Float32, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), ConstTensorHandle::GetTensorInfo(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

62 {
63  ARMNN_ASSERT(handle);
64 
65  switch(handle->GetTensorInfo().GetDataType())
66  {
67  case DataType::Float16:
68  CopyArmComputeTensorData(tensor, handle->GetConstTensor<armnn::Half>());
69  break;
70  case DataType::Float32:
71  CopyArmComputeTensorData(tensor, handle->GetConstTensor<float>());
72  break;
73  case DataType::QAsymmU8:
74  CopyArmComputeTensorData(tensor, handle->GetConstTensor<uint8_t>());
75  break;
76  case DataType::QSymmS8:
77  case DataType::QAsymmS8:
78  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int8_t>());
79  break;
80  case DataType::Signed32:
81  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int32_t>());
82  break;
83  case DataType::QSymmS16:
84  CopyArmComputeTensorData(tensor, handle->GetConstTensor<int16_t>());
85  break;
86  default:
87  ARMNN_ASSERT_MSG(false, "Unexpected tensor type.");
88  }
89 };
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
void CopyArmComputeTensorData(arm_compute::Tensor &dstTensor, const T *srcData)
half_float::half Half
Definition: Half.hpp:18

◆ InsertConvertBf16ToFp32LayersBefore()

std::vector< ConvertBf16ToFp32Layer * > InsertConvertBf16ToFp32LayersBefore ( Graph graph,
Layer layer,
bool  expectCorrectInputType 
)

Definition at line 51 of file NetworkUtils.cpp.

References Layer::BeginInputSlots(), BFloat16, Layer::EndInputSlots(), Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment().

54 {
55  std::vector<ConvertBf16ToFp32Layer*> convertLayers;
56  convertLayers.reserve(layer.GetNumInputSlots());
57 
58  // Insert a ConvertBf16ToFp32Layer before each input slot
59  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
60  {
61  bool allowInsert = true;
62  if (expectCorrectInputType)
63  {
64  // Only insert ConvertBf16ToFp32Layer before BF16 input slots
65  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
66  allowInsert =
67  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
68  }
69 
70  if (allowInsert)
71  {
72  const std::string name =
73  std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
74  layer.GetName();
75  ConvertBf16ToFp32Layer* convertLayer =
76  graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
77 
78  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
79  convertInfo.SetDataType(DataType::Float32);
80 
81  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
82 
83  convertLayers.emplace_back(convertLayer);
84  }
85  }
86 
87  return convertLayers;
88 }

◆ InsertConvertFp16ToFp32LayersBefore()

std::vector< ConvertFp16ToFp32Layer * > InsertConvertFp16ToFp32LayersBefore ( Graph graph,
Layer layer,
bool  expectCorrectInputType 
)

Definition at line 138 of file NetworkUtils.cpp.

References Layer::BeginInputSlots(), Layer::EndInputSlots(), Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), ConvertFp32NetworkToFp16Impl::Run(), and TEST_SUITE().

141 {
142  std::vector<ConvertFp16ToFp32Layer*> convertLayers;
143  convertLayers.reserve(layer.GetNumInputSlots());
144 
145  // Insert a ConvertFp16ToFp32Layer before each input slot
146  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
147  {
148  bool allowInsert = true;
149  if (expectCorrectInputType)
150  {
151  // Only insert ConvertFp16ToFp32Layer before FP16 input slots
152  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
153  allowInsert =
154  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
155  }
156 
157  if (allowInsert)
158  {
159  const std::string name =
160  std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
161  layer.GetName();
162  ConvertFp16ToFp32Layer* convertLayer =
163  graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
164 
165  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
166  convertInfo.SetDataType(DataType::Float32);
167 
168  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
169 
170  convertLayers.emplace_back(convertLayer);
171  }
172  }
173 
174  return convertLayers;
175 }

◆ InsertConvertFp32ToBf16LayersAfter()

std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersAfter ( Graph graph,
Layer layer 
)

Definition at line 177 of file NetworkUtils.cpp.

References BFloat16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment().

178 {
179  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
180 
181  std::vector<ConvertFp32ToBf16Layer*> convertLayers;
182  convertLayers.reserve(numOutputSlots);
183 
184  // Update Bf16 output slots to FP32 on current layer
185  ChangeOutputBf16ToFp32(layer);
186 
187  // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
188  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
189  {
190  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
191  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
192  {
193  const std::string name =
194  std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
195  ConvertFp32ToBf16Layer* convertLayer =
196  graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
197 
198  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
199  convertInfo.SetDataType(DataType::BFloat16);
200 
201  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
202 
203  convertLayers.emplace_back(convertLayer);
204  }
205  }
206 
207  return convertLayers;
208 }

◆ InsertConvertFp32ToBf16LayersBefore()

std::vector< ConvertFp32ToBf16Layer * > InsertConvertFp32ToBf16LayersBefore ( Graph graph,
Layer layer,
bool  expectCorrectInputType 
)

Definition at line 90 of file NetworkUtils.cpp.

References Layer::BeginInputSlots(), BFloat16, Convolution2d, DepthwiseConvolution2d, Layer::EndInputSlots(), Float32, FullyConnected, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumInputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Layer::GetType(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by ConvertFp32NetworkToBf16Impl::Run().

93 {
94  std::vector<ConvertFp32ToBf16Layer*> convertLayers;
95  convertLayers.reserve(layer.GetNumInputSlots());
96 
97  // Insert a ConvertFp32ToBf16Layer before each input slot
98  for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
99  {
100  bool allowInsert = true;
101 
102  if ((layer.GetType() == LayerType::Convolution2d ||
103  layer.GetType() == LayerType::FullyConnected ||
104  layer.GetType() == LayerType::DepthwiseConvolution2d)
105  && inputSlot->GetSlotIndex() == 2)
106  {
107  // Refrain from reducing bias to Bf16
108  continue;
109  }
110  if (expectCorrectInputType)
111  {
112  // Only insert ConvertFp32ToBf16Layer before FP32 input slots
113  OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
114  allowInsert =
115  connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
116  }
117 
118  if (allowInsert)
119  {
120  const std::string name =
121  std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
122  layer.GetName();
123  ConvertFp32ToBf16Layer* convertLayer =
124  graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
125 
126  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
127  convertInfo.SetDataType(DataType::BFloat16);
128 
129  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
130 
131  convertLayers.emplace_back(convertLayer);
132  }
133  }
134 
135  return convertLayers;
136 }
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.

◆ InsertConvertFp32ToFp16LayersAfter()

std::vector< ConvertFp32ToFp16Layer * > InsertConvertFp32ToFp16LayersAfter ( Graph graph,
Layer layer 
)

Definition at line 210 of file NetworkUtils.cpp.

References Float16, Float32, InputSlot::GetConnectedOutputSlot(), TensorInfo::GetDataType(), Layer::GetInputSlot(), Layer::GetName(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), TensorInfo::SetDataType(), and OutputSlot::SetTensorInfo().

Referenced by AttemptBackendAssignment(), ConvertFp32NetworkToFp16Impl::Run(), and TEST_SUITE().

211 {
212  const unsigned int numOutputSlots = layer.GetNumOutputSlots();
213 
214  std::vector<ConvertFp32ToFp16Layer*> convertLayers;
215  convertLayers.reserve(numOutputSlots);
216 
217  // Update FP16 output slots to FP32 on current layer
218  ChangeOutputFp16ToFp32(layer);
219 
220  // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
221  for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
222  {
223  OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
224  if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
225  {
226  const std::string name =
227  std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
228  ConvertFp32ToFp16Layer* convertLayer =
229  graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
230 
231  TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
232  convertInfo.SetDataType(DataType::Float16);
233 
234  convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
235 
236  convertLayers.emplace_back(convertLayer);
237  }
238  }
239 
240  return convertLayers;
241 }

◆ InsertDebugLayerAfter()

std::vector< DebugLayer * > InsertDebugLayerAfter ( Graph graph,
Layer layer 
)

Definition at line 243 of file NetworkUtils.cpp.

References ARMNN_ASSERT, Layer::BeginOutputSlots(), CpuRef, Layer::EndOutputSlots(), InputSlot::GetConnectedOutputSlot(), Layer::GetInputSlot(), Layer::GetNameStr(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), OutputSlot::GetTensorInfo(), Graph::InsertNewLayer(), Layer::SetBackendId(), and OutputSlot::SetTensorInfo().

Referenced by AddDebugImpl::Run().

244 {
245  std::vector<DebugLayer*> debugLayers;
246  debugLayers.reserve(layer.GetNumOutputSlots());
247 
248  // Connect a DebugLayer to each output slot of the layer
249  uint32_t outputSlotIdx = 0;
250  for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
251  {
252  const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr() + "_" +
253  std::to_string(outputSlotIdx);
254 
255  DebugLayer* debugLayer =
256  graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());
257 
258  // Sets output tensor info for the debug layer.
259  ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
260  TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
261 
262  debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
263 
264  // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
265  debugLayer->SetBackendId(Compute::CpuRef);
266 
267  debugLayers.emplace_back(debugLayer);
268 
269  ++outputSlotIdx;
270  }
271 
272  return debugLayers;
273 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ InstanceNorm()

void InstanceNorm ( const InstanceNormalizationQueueDescriptor data,
const TensorInfo inputInfo,
Decoder< float > &  inputDecoder,
Encoder< float > &  outputEncoder 
)

Definition at line 18 of file InstanceNorm.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), InstanceNormalizationDescriptor::m_Beta, InstanceNormalizationDescriptor::m_DataLayout, InstanceNormalizationDescriptor::m_Eps, InstanceNormalizationDescriptor::m_Gamma, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

Referenced by RefInstanceNormalizationWorkload::ExecuteAsync().

22 {
23  const TensorShape inputShape = inputInfo.GetShape();
24 
25  armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
26 
27  unsigned int inputBatches = inputShape[0];
28  unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
29  unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
30  unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
31 
32  float beta = data.m_Parameters.m_Beta;
33  float eps = data.m_Parameters.m_Eps;
34  float gamma = data.m_Parameters.m_Gamma;
35 
36  for (unsigned int n = 0; n < inputBatches; ++n)
37  {
38  for (unsigned int c = 0; c < inputChannels; ++c)
39  {
40  float mean = 0, var = 0;
41 
42  //Calculate Mean
43  for (unsigned int h = 0; h < inputHeight; h++)
44  {
45  for (unsigned int w = 0; w < inputWidth; w++)
46  {
47  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
48 
49  inputDecoder[index];
50  float value = inputDecoder.Get();
51  mean += value;
52  }
53  }
54  mean /= static_cast<float>(inputHeight * inputWidth);
55 
56  //Calculate Variance
57  for (unsigned int h = 0; h < inputHeight; h++)
58  {
59  for (unsigned int w = 0; w < inputWidth; w++)
60  {
61  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
62 
63  inputDecoder[index];
64  float value = inputDecoder.Get();
65  var += (value - mean) * (value - mean);
66  }
67  }
68  var /= static_cast<float>(inputHeight * inputWidth);
69 
70  // Apply Instance Normalisation
71  for (unsigned int h = 0; h < inputHeight; ++h)
72  {
73  for (unsigned int w = 0; w < inputWidth; ++w)
74  {
75  unsigned int index = dataLayout.GetIndex(inputShape, n, c, h, w);
76  inputDecoder[index];
77  outputEncoder[index];
78  outputEncoder.Set((inputDecoder.Get() - mean) * gamma / std::sqrt ( var + eps) + beta);
79  }
80 
81  }
82  }
83  }
84 }
virtual void Set(IType right)=0
virtual IType Get() const =0
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...

◆ IntersectionOverUnion()

float IntersectionOverUnion ( const float *  boxI,
const float *  boxJ 
)

Definition at line 30 of file DetectionPostProcess.cpp.

Referenced by NonMaxSuppression(), and TEST_SUITE().

31 {
32  // Box-corner format: ymin, xmin, ymax, xmax.
33  const int yMin = 0;
34  const int xMin = 1;
35  const int yMax = 2;
36  const int xMax = 3;
37  float areaI = (boxI[yMax] - boxI[yMin]) * (boxI[xMax] - boxI[xMin]);
38  float areaJ = (boxJ[yMax] - boxJ[yMin]) * (boxJ[xMax] - boxJ[xMin]);
39  float yMinIntersection = std::max(boxI[yMin], boxJ[yMin]);
40  float xMinIntersection = std::max(boxI[xMin], boxJ[xMin]);
41  float yMaxIntersection = std::min(boxI[yMax], boxJ[yMax]);
42  float xMaxIntersection = std::min(boxI[xMax], boxJ[xMax]);
43  float areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
44  std::max(xMaxIntersection - xMinIntersection, 0.0f);
45  float areaUnion = areaI + areaJ - areaIntersection;
46  return areaIntersection / areaUnion;
47 }

◆ IsActivationSupported()

bool armnn::IsActivationSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and ILayerSupport::~ILayerSupport().

◆ IsAdditionSupported()

bool armnn::IsAdditionSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsBatchNormalizationSupported()

bool armnn::IsBatchNormalizationSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsBatchToSpaceNdSupported()

bool armnn::IsBatchToSpaceNdSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsBFloat16()

bool armnn::IsBFloat16 ( const WorkloadInfo info)

Definition at line 53 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

54 {
55  return IsDataType<DataType::BFloat16>(info);
56 }

◆ IsCapabilitySupported()

bool IsCapabilitySupported ( const armnn::BackendId backend,
armnn::BackendCapability  capability 
)

Convenience function to check a capability on a backend.

Definition at line 114 of file BackendHelper.cpp.

References ARMNN_NO_DEPRECATE_WARN_BEGIN, ARMNN_NO_DEPRECATE_WARN_END, and BackendRegistryInstance().

115 {
116  bool hasCapability = false;
117  auto const& backendRegistry = armnn::BackendRegistryInstance();
118  if (backendRegistry.IsBackendRegistered(backend))
119  {
120  auto factoryFunc = backendRegistry.GetFactory(backend);
121  auto backendObject = factoryFunc();
123  hasCapability = backendObject->HasCapability(capability);
125  }
126  return hasCapability;
127 }
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
BackendRegistry & BackendRegistryInstance()
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34

◆ IsConcatSupported()

bool armnn::IsConcatSupported ( const BackendId backend,
const std::vector< const TensorInfo *>  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConstantSupported()

bool armnn::IsConstantSupported ( const BackendId backend,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConvertFp16ToFp32Supported()

bool armnn::IsConvertFp16ToFp32Supported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConvertFp32ToFp16Supported()

bool armnn::IsConvertFp32ToFp16Supported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsConvolution2dSupported()

bool armnn::IsConvolution2dSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsDataType()

bool armnn::IsDataType ( const WorkloadInfo info)

Definition at line 32 of file RefWorkloadFactory.cpp.

References WorkloadInfo::m_InputTensorInfos, and WorkloadInfo::m_OutputTensorInfos.

33 {
34  auto checkType = [](const TensorInfo& tensorInfo) {return tensorInfo.GetDataType() == ArmnnType;};
35  auto it = std::find_if(std::begin(info.m_InputTensorInfos), std::end(info.m_InputTensorInfos), checkType);
36  if (it != std::end(info.m_InputTensorInfos))
37  {
38  return true;
39  }
40  it = std::find_if(std::begin(info.m_OutputTensorInfos), std::end(info.m_OutputTensorInfos), checkType);
41  if (it != std::end(info.m_OutputTensorInfos))
42  {
43  return true;
44  }
45  return false;
46 }

◆ IsDebugSupported()

bool armnn::IsDebugSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsDepthwiseConvolutionSupported()

bool armnn::IsDepthwiseConvolutionSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsDequantizeSupported()

bool armnn::IsDequantizeSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsDivisionSupported()

bool armnn::IsDivisionSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsEqualSupported()

bool armnn::IsEqualSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

◆ IsFakeQuantizationSupported()

bool armnn::IsFakeQuantizationSupported ( const BackendId backend,
const TensorInfo input,
const FakeQuantizationDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsFloat16()

bool armnn::IsFloat16 ( const WorkloadInfo info)

Definition at line 58 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

59 {
60  return IsDataType<DataType::Float16>(info);
61 }

◆ IsFloorSupported()

bool armnn::IsFloorSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsFullyConnectedSupported()

bool armnn::IsFullyConnectedSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const TensorInfo biases,
const FullyConnectedDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsGreaterSupported()

bool armnn::IsGreaterSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

◆ IsInputSupported()

bool armnn::IsInputSupported ( const BackendId backend,
const TensorInfo input,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsL2NormalizationSupported()

bool armnn::IsL2NormalizationSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsLayerOptimizable() [1/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer layer)

Definition at line 85 of file MockBackend.cpp.

References ARMNN_ASSERT, and Layer::GetName().

Referenced by IsLayerOptimizable(), and MockBackend::OptimizeSubgraphView().

86 {
87  ARMNN_ASSERT(layer != nullptr);
88 
89  // A Layer is not optimizable if its name contains "unoptimizable"
90  const std::string layerName(layer->GetName());
91  bool optimizable = layerName.find("unoptimizable") == std::string::npos;
92 
93  return optimizable;
94 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:317

◆ IsLayerOptimizable() [2/2]

bool armnn::IsLayerOptimizable ( const armnn::Layer layer)

Definition at line 96 of file MockBackend.cpp.

References IsLayerOptimizable().

97 {
98  return IsLayerOptimizable(&layer);
99 }
bool IsLayerOptimizable(const armnn::Layer &layer)
Definition: MockBackend.cpp:96

◆ IsLayerSupported() [1/2]

bool armnn::IsLayerSupported ( const armnn::Layer layer)

Definition at line 60 of file MockBackend.cpp.

References Addition, ARMNN_ASSERT, Constant, Convolution2d, Layer::GetType(), Input, and Output.

Referenced by SampleDynamicWorkloadFactory::IsLayerSupported().

61 {
62  ARMNN_ASSERT(layer != nullptr);
63 
64  armnn::LayerType layerType = layer->GetType();
65  switch (layerType)
66  {
72  // Layer supported
73  return true;
74  default:
75  // Layer unsupported
76  return false;
77  }
78 }
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467

◆ IsLayerSupported() [2/2]

bool armnn::IsLayerSupported ( const armnn::Layer layer)

Definition at line 80 of file MockBackend.cpp.

References IWorkloadFactory::IsLayerSupported().

81 {
82  return IsLayerSupported(&layer);
83 }
bool IsLayerSupported(const armnn::Layer &layer)
Definition: MockBackend.cpp:80

◆ IsLstmSupported()

bool armnn::IsLstmSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMaximumSupported()

bool armnn::IsMaximumSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnSupported = nullptr,
size_t  reasonIfUnSupportedMaxLength = 0 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMeanSupported()

bool armnn::IsMeanSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMemCopySupported()

bool armnn::IsMemCopySupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMergeSupported()

bool armnn::IsMergeSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMinimumSupported()

bool armnn::IsMinimumSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsMultiplicationSupported()

bool armnn::IsMultiplicationSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsNormalizationSupported()

bool armnn::IsNormalizationSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsOperationQueueDescriptor() [1/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const QueueDescriptorType &  )

Definition at line 18 of file RefWorkloadFactory.hpp.

18 { return true; }

◆ IsOperationQueueDescriptor() [2/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const MemCopyQueueDescriptor )

Definition at line 21 of file RefWorkloadFactory.hpp.

21 { return false; }

◆ IsOperationQueueDescriptor() [3/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const ConstantQueueDescriptor )

Definition at line 24 of file RefWorkloadFactory.hpp.

24 { return false; }

◆ IsOperationQueueDescriptor() [4/4]

constexpr bool armnn::IsOperationQueueDescriptor ( const PermuteQueueDescriptor )

Definition at line 27 of file RefWorkloadFactory.hpp.

27 { return false; }

◆ IsOutputSupported()

bool armnn::IsOutputSupported ( const BackendId backend,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported(), and MockLayerSupport::IsLayerSupported().

◆ IsPadSupported()

bool armnn::IsPadSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPermuteSupported()

bool armnn::IsPermuteSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPooling2dSupported()

bool armnn::IsPooling2dSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPreCompiledSupported()

bool armnn::IsPreCompiledSupported ( const BackendId backend,
const TensorInfo input,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsPreluSupported()

bool armnn::IsPreluSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsQAsymmS8()

bool armnn::IsQAsymmS8 ( const WorkloadInfo info)

Definition at line 73 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

74 {
75  return IsDataType<DataType::QAsymmS8>(info);
76 }

◆ IsQAsymmU8()

bool armnn::IsQAsymmU8 ( const WorkloadInfo info)

Definition at line 78 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

79 {
80  return IsDataType<DataType::QAsymmU8>(info);
81 }

◆ IsQSymmS16()

bool armnn::IsQSymmS16 ( const WorkloadInfo info)

Definition at line 63 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

64 {
65  return IsDataType<DataType::QSymmS16>(info);
66 }

◆ IsQSymmS8()

bool armnn::IsQSymmS8 ( const WorkloadInfo info)

Definition at line 68 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

69 {
70  return IsDataType<DataType::QSymmS8>(info);
71 }

◆ IsQuantized8BitType()

constexpr bool armnn::IsQuantized8BitType ( DataType  dataType)

Definition at line 285 of file TypesUtils.hpp.

References QAsymmS8, QAsymmU8, and QSymmS8.

Referenced by GetBiasDataType(), RefLayerSupport::IsConvolution2dSupported(), RefLayerSupport::IsConvolution3dSupported(), RefLayerSupport::IsDepthwiseConvolutionSupported(), IsQuantizedType(), and RefLayerSupport::IsTransposeConvolution2dSupported().

286 {
287  return dataType == DataType::QAsymmU8 ||
288  dataType == DataType::QAsymmS8 ||
289  dataType == DataType::QSymmS8;
290 }

◆ IsQuantizedLstmSupported()

bool armnn::IsQuantizedLstmSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo previousCellStateIn,
const TensorInfo previousOutputIn,
const TensorInfo cellStateOut,
const TensorInfo output,
const QuantizedLstmInputParamsInfo paramsInfo,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsQuantizedType() [1/2]

constexpr bool armnn::IsQuantizedType ( )

◆ IsQuantizedType() [2/2]

constexpr bool armnn::IsQuantizedType ( DataType  dataType)

Definition at line 292 of file TypesUtils.hpp.

References IsQuantized8BitType(), and QSymmS16.

293 {
294  return dataType == DataType::QSymmS16 || IsQuantized8BitType(dataType);
295 }
constexpr bool IsQuantized8BitType(DataType dataType)
Definition: TypesUtils.hpp:285

◆ IsReadyForSplitAssignment()

bool armnn::IsReadyForSplitAssignment ( LayerSelectionInfo::LayerInfoContainer &  layerInfos,
LayerSelectionInfo &  layerInfo 
)

Definition at line 374 of file SubgraphViewSelector.cpp.

References ForEachLayerInput().

Referenced by SubgraphViewSelector::SelectSubgraphs().

375 {
376  bool ready = true;
377  ForEachLayerInput(layerInfos, layerInfo,
378  [&ready](LayerSelectionInfo& parentInfo)
379  {
380  if (!parentInfo.m_IsProcessed)
381  {
382  ready = false;
383  }
384  });
385  return ready;
386 }
void ForEachLayerInput(LayerSelectionInfo::LayerInfoContainer &layerInfos, LayerSelectionInfo &layerInfo, Delegate function)

◆ IsReduceSupported()

bool armnn::IsReduceSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsReshapeSupported()

bool armnn::IsReshapeSupported ( const BackendId backend,
const TensorInfo input,
const ReshapeDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsResizeSupported()

bool armnn::IsResizeSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsRsqrtSupported()

bool armnn::IsRsqrtSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

◆ IsSigned32()

bool armnn::IsSigned32 ( const WorkloadInfo info)

Definition at line 48 of file RefWorkloadFactory.cpp.

References info.

Referenced by RefWorkloadFactory::CreateWorkload().

49 {
50  return IsDataType<DataType::Signed32>(info);
51 }

◆ IsSoftmaxSupported()

bool armnn::IsSoftmaxSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSpaceToBatchNdSupported()

bool armnn::IsSpaceToBatchNdSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSpaceToDepthSupported()

bool armnn::IsSpaceToDepthSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSplitterSupported()

bool armnn::IsSplitterSupported ( const BackendId backend,
const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
const ViewsDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsStackSupported()

bool armnn::IsStackSupported ( const BackendId backend,
const std::vector< const TensorInfo *>  inputs,
const TensorInfo output,
const StackDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsStridedSliceSupported()

bool armnn::IsStridedSliceSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSubtractionSupported()

bool armnn::IsSubtractionSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsSupportedForDataTypeGeneric()

bool armnn::IsSupportedForDataTypeGeneric ( Optional< std::string &>  reasonIfUnsupported,
DataType  dataType,
Float16Func  float16FuncPtr,
Float32Func  float32FuncPtr,
Uint8Func  uint8FuncPtr,
Int32Func  int32FuncPtr,
BooleanFunc  booleanFuncPtr,
Params &&...  params 
)

Definition at line 27 of file LayerSupportCommon.hpp.

References Boolean, Float16, Float32, QAsymmU8, and Signed32.

Referenced by RefLayerSupport::IsConvertFp16ToFp32Supported(), RefLayerSupport::IsConvertFp32ToFp16Supported(), and NeonLayerSupport::IsFloorSupported().

35 {
36  switch(dataType)
37  {
38  case DataType::Float16:
39  return float16FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
40  case DataType::Float32:
41  return float32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
42  case DataType::QAsymmU8:
43  return uint8FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
44  case DataType::Signed32:
45  return int32FuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
46  case DataType::Boolean:
47  return booleanFuncPtr(reasonIfUnsupported, std::forward<Params>(params)...);
48  default:
49  return false;
50  }
51 }

◆ IsSwitchSupported()

bool armnn::IsSwitchSupported ( const BackendId backend,
const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output0,
const TensorInfo output1,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ IsTransposeConvolution2dSupported()

bool armnn::IsTransposeConvolution2dSupported ( const BackendId backend,
const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
char *  reasonIfUnsupported = nullptr,
size_t  reasonIfUnsupportedMaxLength = 1024 
)

Deprecated in favor of IBackend and ILayerSupport interfaces.

Referenced by ILayerSupport::IsLayerSupported().

◆ LayerEnumOf() [1/74]

constexpr LayerType armnn::LayerEnumOf ( const T *  = nullptr)

◆ LayerEnumOf() [2/74]

constexpr LayerType armnn::LayerEnumOf ( const ActivationLayer )

Definition at line 110 of file LayersFwd.hpp.

◆ LayerEnumOf() [3/74]

constexpr LayerType armnn::LayerEnumOf ( const AdditionLayer )

Definition at line 111 of file LayersFwd.hpp.

◆ LayerEnumOf() [4/74]

constexpr LayerType armnn::LayerEnumOf ( const ArgMinMaxLayer )

Definition at line 112 of file LayersFwd.hpp.

◆ LayerEnumOf() [5/74]

constexpr LayerType armnn::LayerEnumOf ( const BatchNormalizationLayer )

Definition at line 113 of file LayersFwd.hpp.

◆ LayerEnumOf() [6/74]

constexpr LayerType armnn::LayerEnumOf ( const BatchToSpaceNdLayer )

Definition at line 114 of file LayersFwd.hpp.

◆ LayerEnumOf() [7/74]

constexpr LayerType armnn::LayerEnumOf ( const CastLayer )

Definition at line 115 of file LayersFwd.hpp.

◆ LayerEnumOf() [8/74]

constexpr LayerType armnn::LayerEnumOf ( const ChannelShuffleLayer )

Definition at line 116 of file LayersFwd.hpp.

◆ LayerEnumOf() [9/74]

constexpr LayerType armnn::LayerEnumOf ( const ComparisonLayer )

Definition at line 117 of file LayersFwd.hpp.

◆ LayerEnumOf() [10/74]

constexpr LayerType armnn::LayerEnumOf ( const ConcatLayer )

Definition at line 118 of file LayersFwd.hpp.

◆ LayerEnumOf() [11/74]

constexpr LayerType armnn::LayerEnumOf ( const ConstantLayer )

Definition at line 119 of file LayersFwd.hpp.

◆ LayerEnumOf() [12/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertBf16ToFp32Layer )

Definition at line 120 of file LayersFwd.hpp.

◆ LayerEnumOf() [13/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp16ToFp32Layer )

Definition at line 121 of file LayersFwd.hpp.

◆ LayerEnumOf() [14/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToBf16Layer )

Definition at line 122 of file LayersFwd.hpp.

◆ LayerEnumOf() [15/74]

constexpr LayerType armnn::LayerEnumOf ( const ConvertFp32ToFp16Layer )

Definition at line 123 of file LayersFwd.hpp.

◆ LayerEnumOf() [16/74]

constexpr LayerType armnn::LayerEnumOf ( const Convolution2dLayer )

Definition at line 124 of file LayersFwd.hpp.

◆ LayerEnumOf() [17/74]

constexpr LayerType armnn::LayerEnumOf ( const Convolution3dLayer )

Definition at line 125 of file LayersFwd.hpp.

◆ LayerEnumOf() [18/74]

constexpr LayerType armnn::LayerEnumOf ( const DebugLayer )

Definition at line 126 of file LayersFwd.hpp.

◆ LayerEnumOf() [19/74]

constexpr LayerType armnn::LayerEnumOf ( const DepthToSpaceLayer )

Definition at line 127 of file LayersFwd.hpp.

◆ LayerEnumOf() [20/74]

constexpr LayerType armnn::LayerEnumOf ( const DepthwiseConvolution2dLayer )

Definition at line 128 of file LayersFwd.hpp.

◆ LayerEnumOf() [21/74]

constexpr LayerType armnn::LayerEnumOf ( const DequantizeLayer )

Definition at line 129 of file LayersFwd.hpp.

◆ LayerEnumOf() [22/74]

constexpr LayerType armnn::LayerEnumOf ( const DetectionPostProcessLayer )

Definition at line 130 of file LayersFwd.hpp.

◆ LayerEnumOf() [23/74]

constexpr LayerType armnn::LayerEnumOf ( const DivisionLayer )

Definition at line 131 of file LayersFwd.hpp.

◆ LayerEnumOf() [24/74]

constexpr LayerType armnn::LayerEnumOf ( const ElementwiseUnaryLayer )

Definition at line 132 of file LayersFwd.hpp.

◆ LayerEnumOf() [25/74]

constexpr LayerType armnn::LayerEnumOf ( const FakeQuantizationLayer )

Definition at line 133 of file LayersFwd.hpp.

◆ LayerEnumOf() [26/74]

constexpr LayerType armnn::LayerEnumOf ( const FillLayer )

Definition at line 134 of file LayersFwd.hpp.

◆ LayerEnumOf() [27/74]

constexpr LayerType armnn::LayerEnumOf ( const FloorLayer )

Definition at line 135 of file LayersFwd.hpp.

◆ LayerEnumOf() [28/74]

constexpr LayerType armnn::LayerEnumOf ( const FullyConnectedLayer )

Definition at line 136 of file LayersFwd.hpp.

◆ LayerEnumOf() [29/74]

constexpr LayerType armnn::LayerEnumOf ( const GatherLayer )

Definition at line 137 of file LayersFwd.hpp.

◆ LayerEnumOf() [30/74]

constexpr LayerType armnn::LayerEnumOf ( const GatherNdLayer )

Definition at line 138 of file LayersFwd.hpp.

◆ LayerEnumOf() [31/74]

constexpr LayerType armnn::LayerEnumOf ( const InputLayer )

Definition at line 139 of file LayersFwd.hpp.

◆ LayerEnumOf() [32/74]

constexpr LayerType armnn::LayerEnumOf ( const InstanceNormalizationLayer )

Definition at line 140 of file LayersFwd.hpp.

◆ LayerEnumOf() [33/74]

constexpr LayerType armnn::LayerEnumOf ( const L2NormalizationLayer )

Definition at line 141 of file LayersFwd.hpp.

◆ LayerEnumOf() [34/74]

constexpr LayerType armnn::LayerEnumOf ( const LogicalBinaryLayer )

Definition at line 142 of file LayersFwd.hpp.

◆ LayerEnumOf() [35/74]

constexpr LayerType armnn::LayerEnumOf ( const LogSoftmaxLayer )

Definition at line 143 of file LayersFwd.hpp.

◆ LayerEnumOf() [36/74]

constexpr LayerType armnn::LayerEnumOf ( const LstmLayer )

Definition at line 144 of file LayersFwd.hpp.

◆ LayerEnumOf() [37/74]

constexpr LayerType armnn::LayerEnumOf ( const MapLayer )

Definition at line 145 of file LayersFwd.hpp.

◆ LayerEnumOf() [38/74]

constexpr LayerType armnn::LayerEnumOf ( const MaximumLayer )

Definition at line 146 of file LayersFwd.hpp.

◆ LayerEnumOf() [39/74]

constexpr LayerType armnn::LayerEnumOf ( const MeanLayer )

Definition at line 147 of file LayersFwd.hpp.

◆ LayerEnumOf() [40/74]

constexpr LayerType armnn::LayerEnumOf ( const MemCopyLayer )

Definition at line 148 of file LayersFwd.hpp.

◆ LayerEnumOf() [41/74]

constexpr LayerType armnn::LayerEnumOf ( const MemImportLayer )

Definition at line 149 of file LayersFwd.hpp.

◆ LayerEnumOf() [42/74]

constexpr LayerType armnn::LayerEnumOf ( const MergeLayer )

Definition at line 150 of file LayersFwd.hpp.

◆ LayerEnumOf() [43/74]

constexpr LayerType armnn::LayerEnumOf ( const MinimumLayer )

Definition at line 151 of file LayersFwd.hpp.

◆ LayerEnumOf() [44/74]

constexpr LayerType armnn::LayerEnumOf ( const MultiplicationLayer )

Definition at line 152 of file LayersFwd.hpp.

◆ LayerEnumOf() [45/74]

constexpr LayerType armnn::LayerEnumOf ( const NormalizationLayer )

Definition at line 153 of file LayersFwd.hpp.

◆ LayerEnumOf() [46/74]

constexpr LayerType armnn::LayerEnumOf ( const OutputLayer )

Definition at line 154 of file LayersFwd.hpp.

◆ LayerEnumOf() [47/74]

constexpr LayerType armnn::LayerEnumOf ( const PadLayer )

Definition at line 155 of file LayersFwd.hpp.

◆ LayerEnumOf() [48/74]

constexpr LayerType armnn::LayerEnumOf ( const PermuteLayer )

Definition at line 156 of file LayersFwd.hpp.

◆ LayerEnumOf() [49/74]

constexpr LayerType armnn::LayerEnumOf ( const Pooling2dLayer )

Definition at line 157 of file LayersFwd.hpp.

◆ LayerEnumOf() [50/74]

constexpr LayerType armnn::LayerEnumOf ( const Pooling3dLayer )

Definition at line 158 of file LayersFwd.hpp.

◆ LayerEnumOf() [51/74]

constexpr LayerType armnn::LayerEnumOf ( const PreCompiledLayer )

Definition at line 159 of file LayersFwd.hpp.

◆ LayerEnumOf() [52/74]

constexpr LayerType armnn::LayerEnumOf ( const PreluLayer )

Definition at line 160 of file LayersFwd.hpp.

◆ LayerEnumOf() [53/74]

constexpr LayerType armnn::LayerEnumOf ( const QuantizeLayer )

Definition at line 161 of file LayersFwd.hpp.

◆ LayerEnumOf() [54/74]

constexpr LayerType armnn::LayerEnumOf ( const QLstmLayer )

Definition at line 162 of file LayersFwd.hpp.

◆ LayerEnumOf() [55/74]

constexpr LayerType armnn::LayerEnumOf ( const QuantizedLstmLayer )

Definition at line 163 of file LayersFwd.hpp.

◆ LayerEnumOf() [56/74]

constexpr LayerType armnn::LayerEnumOf ( const RankLayer )

Definition at line 164 of file LayersFwd.hpp.

◆ LayerEnumOf() [57/74]

constexpr LayerType armnn::LayerEnumOf ( const ReduceLayer )

Definition at line 165 of file LayersFwd.hpp.

◆ LayerEnumOf() [58/74]

constexpr LayerType armnn::LayerEnumOf ( const ReshapeLayer )

Definition at line 166 of file LayersFwd.hpp.

◆ LayerEnumOf() [59/74]

constexpr LayerType armnn::LayerEnumOf ( const ResizeLayer )

Definition at line 167 of file LayersFwd.hpp.

◆ LayerEnumOf() [60/74]

constexpr LayerType armnn::LayerEnumOf ( const ShapeLayer )

Definition at line 168 of file LayersFwd.hpp.

◆ LayerEnumOf() [61/74]

constexpr LayerType armnn::LayerEnumOf ( const SliceLayer )

Definition at line 169 of file LayersFwd.hpp.

◆ LayerEnumOf() [62/74]

constexpr LayerType armnn::LayerEnumOf ( const SoftmaxLayer )

Definition at line 170 of file LayersFwd.hpp.

◆ LayerEnumOf() [63/74]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToBatchNdLayer )

Definition at line 171 of file LayersFwd.hpp.

◆ LayerEnumOf() [64/74]

constexpr LayerType armnn::LayerEnumOf ( const SpaceToDepthLayer )

Definition at line 172 of file LayersFwd.hpp.

◆ LayerEnumOf() [65/74]

constexpr LayerType armnn::LayerEnumOf ( const SplitterLayer )

Definition at line 173 of file LayersFwd.hpp.

◆ LayerEnumOf() [66/74]

constexpr LayerType armnn::LayerEnumOf ( const StackLayer )

Definition at line 174 of file LayersFwd.hpp.

◆ LayerEnumOf() [67/74]

constexpr LayerType armnn::LayerEnumOf ( const StandInLayer )

Definition at line 175 of file LayersFwd.hpp.

◆ LayerEnumOf() [68/74]

constexpr LayerType armnn::LayerEnumOf ( const StridedSliceLayer )

Definition at line 176 of file LayersFwd.hpp.

◆ LayerEnumOf() [69/74]

constexpr LayerType armnn::LayerEnumOf ( const SubtractionLayer )

Definition at line 177 of file LayersFwd.hpp.

◆ LayerEnumOf() [70/74]

constexpr LayerType armnn::LayerEnumOf ( const SwitchLayer )

Definition at line 178 of file LayersFwd.hpp.

◆ LayerEnumOf() [71/74]

constexpr LayerType armnn::LayerEnumOf ( const TransposeLayer )

Definition at line 179 of file LayersFwd.hpp.

◆ LayerEnumOf() [72/74]

constexpr LayerType armnn::LayerEnumOf ( const TransposeConvolution2dLayer )

Definition at line 180 of file LayersFwd.hpp.

◆ LayerEnumOf() [73/74]

constexpr LayerType armnn::LayerEnumOf ( const UnidirectionalSequenceLstmLayer )

Definition at line 181 of file LayersFwd.hpp.

◆ LayerEnumOf() [74/74]

constexpr LayerType armnn::LayerEnumOf ( const UnmapLayer )

Definition at line 182 of file LayersFwd.hpp.

◆ LevelToString()

std::string armnn::LevelToString ( LogSeverity  level)
inline

Definition at line 15 of file Logging.hpp.

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by ScopedRecord::ScopedRecord().

16 {
17  switch(level)
18  {
19  case LogSeverity::Trace:
20  return "Trace";
21  case LogSeverity::Debug:
22  return "Debug";
23  case LogSeverity::Info:
24  return "Info";
25  case LogSeverity::Warning:
26  return "Warning";
27  case LogSeverity::Error:
28  return "Error";
29  case LogSeverity::Fatal:
30  return "Fatal";
31  default:
32  return "Log";
33  }
34 }
void Debug(const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
Definition: Debug.cpp:19

◆ LogSoftmax()

void LogSoftmax ( Decoder< float > &  input,
Encoder< float > &  output,
const TensorInfo inputInfo,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 29 of file LogSoftmax.cpp.

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), IgnoreUnused(), SoftmaxDescriptor::m_Axis, SoftmaxDescriptor::m_Beta, numeric_cast(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

33 {
34  const unsigned int numDimensions = inputInfo.GetNumDimensions();
35 
36  bool axisIsValid = ValidateAxis(descriptor.m_Axis, numDimensions);
37  ARMNN_ASSERT_MSG(axisIsValid,
38  "Axis index is not in range [-numDimensions, numDimensions).");
39  IgnoreUnused(axisIsValid);
40 
41  unsigned int uAxis = descriptor.m_Axis < 0 ?
42  numDimensions - armnn::numeric_cast<unsigned int>(std::abs(descriptor.m_Axis)) :
43  armnn::numeric_cast<unsigned int>(descriptor.m_Axis);
44 
45  const TensorShape& inputShape = inputInfo.GetShape();
46  const unsigned int outerSize = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
47  const unsigned int axisSize = inputShape[uAxis];
48  const unsigned int innerSize = armnnUtils::GetNumElementsBetween(inputShape,
49  uAxis + 1,
50  inputShape.GetNumDimensions());
51 
52  for (unsigned int outer = 0; outer < outerSize; ++outer)
53  {
54  for (unsigned int inner = 0; inner < innerSize; ++inner)
55  {
56  // Find max
57  input[outer * axisSize * innerSize + inner];
58  float maxValue = input.Get();
59  for (unsigned int i = 1u; i < axisSize; ++i)
60  {
61  input[(outer * axisSize + i) * innerSize + inner];
62  maxValue = std::max(maxValue, input.Get());
63  }
64 
65  // Compute sum
66  float sum = 0.0f;
67  for (unsigned int i = 0u; i < axisSize; ++i)
68  {
69  input[(outer * axisSize + i) * innerSize + inner];
70  sum += std::exp((input.Get() - maxValue) * descriptor.m_Beta);
71  }
72 
73  // Compute log sum
74  const float logSum = std::log(sum);
75 
76  // Compute result
77  for (unsigned int i = 0u; i < axisSize; ++i)
78  {
79  const unsigned int index = (outer * axisSize + i) * innerSize + inner;
80 
81  input [index];
82  output[index];
83 
84  output.Set((input.Get() - maxValue) * descriptor.m_Beta - logSum);
85  }
86  }
87  }
88 }
unsigned int GetNumElementsBetween(const armnn::TensorShape &shape, unsigned int firstAxisInclusive, unsigned int lastAxisExclusive)
virtual void Set(IType right)=0
void IgnoreUnused(Ts &&...)
virtual IType Get() const =0
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ LowerString()

std::string armnn::LowerString ( std::string  value)

Definition at line 62 of file ClBackendContext.cpp.

63 {
64  std::transform(value.begin(), value.end(), value.begin(),
65  [](unsigned char c){ return std::tolower(c); });
66 
67  return value;
68 }

◆ LstmImpl()

void LstmImpl ( const LstmDescriptor descriptor,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const TensorShape inputToOutputWeightsShape,
const TensorShape recurrentToOutputWeightsShape,
std::unique_ptr< Decoder< float >> &  inputData,
std::unique_ptr< Decoder< float >> &  outputStateIn,
std::unique_ptr< Decoder< float >> &  cellStateIn,
std::unique_ptr< Encoder< float >> &  outputStateOut,
std::unique_ptr< Encoder< float >> &  cellStateOut,
std::unique_ptr< Encoder< float >> &  output,
std::unique_ptr< Decoder< float >> &  cellStateOutDecoder,
std::unique_ptr< Decoder< float >> &  outputDecoder,
std::unique_ptr< Decoder< float >> &  inputToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToCellWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToCellWeightsTensor,
std::unique_ptr< Decoder< float >> &  recurrentToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToInputWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToForgetWeightsTensor,
std::unique_ptr< Decoder< float >> &  cellToOutputWeightsTensor,
std::unique_ptr< Decoder< float >> &  inputGateBiasTensor,
std::unique_ptr< Decoder< float >> &  forgetGateBiasTensor,
std::unique_ptr< Decoder< float >> &  cellBiasTensor,
std::unique_ptr< Decoder< float >> &  outputGateBiasTensor,
std::unique_ptr< Decoder< float >> &  projectionWeightsTensor,
std::unique_ptr< Decoder< float >> &  projectionBiasTensor,
std::unique_ptr< Decoder< float >> &  inputLayerNormWeights,
std::unique_ptr< Decoder< float >> &  forgetLayerNormWeights,
std::unique_ptr< Decoder< float >> &  cellLayerNormWeights,
std::unique_ptr< Decoder< float >> &  outputLayerNormWeights,
std::unique_ptr< Encoder< float >> &  inputGateScratch,
std::unique_ptr< Encoder< float >> &  cellScratch,
std::unique_ptr< Encoder< float >> &  forgetGateScratch,
std::unique_ptr< Encoder< float >> &  outputGateScratch,
std::unique_ptr< Decoder< float >> &  inputGateScratchDecoder,
std::unique_ptr< Decoder< float >> &  cellScratchDecoder,
std::unique_ptr< Decoder< float >> &  forgetGateScratchDecoder,
std::unique_ptr< Decoder< float >> &  outputGateScratchDecoder,
float  layerNormEpsilon 
)

Definition at line 13 of file Lstm.cpp.

References Activation(), ClipVector(), CopyVector(), TensorInfo::GetDataType(), TensorInfo::GetShape(), LstmDescriptor::m_ActivationFunc, LstmDescriptor::m_CifgEnabled, LstmDescriptor::m_ClippingThresCell, LstmDescriptor::m_ClippingThresProj, LstmDescriptor::m_LayerNormEnabled, LstmDescriptor::m_PeepholeEnabled, LstmDescriptor::m_ProjectionEnabled, MatrixBatchVectorMultiplyAccumulate(), MeanStddevNormalization(), SetActivationParameters(), Sigmoid, Sub1Vector(), VectorBatchVectorAdd(), VectorBatchVectorAssign(), VectorBatchVectorCwiseProduct(), VectorBatchVectorCwiseProductAccumulate(), VectorVectorCwiseProduct(), VectorVectorCwiseProductAccumulate(), and ZeroVector().

Referenced by RefLstmWorkload::ExecuteAsync(), and RefUnidirectionalSequenceLstmWorkload::ExecuteAsync().

56 {
57  // This is a porting of the LSTM::Eval() method in the Android code base
58  // Refer to: android/frameworks/ml/nn/common/operations/LSTM.cpp
59 
60  const TensorShape& inputShape = inputInfo.GetShape();
61  const DataType& outputType = outputInfo.GetDataType();
62 
63  const uint32_t nBatch = inputShape[0];
64  const uint32_t nInput = inputShape[1];
65 
66  const uint32_t nCell = inputToOutputWeightsShape[0];
67  const uint32_t nOutput = recurrentToOutputWeightsShape[1];
68 
69  const bool useCifg = descriptor.m_CifgEnabled;
70  const bool usePeephole = descriptor.m_PeepholeEnabled;
71  const bool useLayerNorm = descriptor.m_LayerNormEnabled;
72 
73  if (!useLayerNorm)
74  {
75  // Initialize scratch buffers with bias.
76  if (!useCifg)
77  {
78  VectorBatchVectorAssign(*inputGateBiasTensor,
79  nCell, nBatch, *inputGateScratch);
80  }
81  VectorBatchVectorAssign(*forgetGateBiasTensor,
82  nCell, nBatch, *forgetGateScratch);
83  VectorBatchVectorAssign(*cellBiasTensor,
84  nCell, nBatch, *cellScratch);
85  VectorBatchVectorAssign(*outputGateBiasTensor,
86  nCell, nBatch, *outputGateScratch);
87  }
88  else
89  {
90  // Initialize scratch buffers with zeroes.
91  if (!useCifg)
92  {
93  ZeroVector(*inputGateScratch, nCell * nBatch);
94  }
95  ZeroVector(*forgetGateScratch, nCell * nBatch);
96  ZeroVector(*cellScratch , nCell * nBatch);
97  ZeroVector(*outputGateScratch, nCell * nBatch);
98  }
99 
100  // For each batch and cell: compute input_weight * input.
101  if (!useCifg)
102  {
103  MatrixBatchVectorMultiplyAccumulate(*inputToInputWeightsTensor,
104  nCell, nInput, *inputData, nBatch, *inputGateScratch);
105  }
106  MatrixBatchVectorMultiplyAccumulate(*inputToForgetWeightsTensor,
107  nCell, nInput, *inputData, nBatch, *forgetGateScratch);
108  MatrixBatchVectorMultiplyAccumulate(*inputToCellWeightsTensor,
109  nCell, nInput, *inputData, nBatch, *cellScratch);
110  MatrixBatchVectorMultiplyAccumulate(*inputToOutputWeightsTensor,
111  nCell, nInput, *inputData, nBatch, *outputGateScratch);
112 
113  // For each batch and cell: compute recurrent_weight * output_state.
114  if (!useCifg)
115  {
116  MatrixBatchVectorMultiplyAccumulate(*recurrentToInputWeightsTensor,
117  nCell, nOutput, *outputStateIn, nBatch, *inputGateScratch);
118  }
119  MatrixBatchVectorMultiplyAccumulate(*recurrentToForgetWeightsTensor,
120  nCell, nOutput, *outputStateIn, nBatch, *forgetGateScratch);
121  MatrixBatchVectorMultiplyAccumulate(*recurrentToCellWeightsTensor,
122  nCell, nOutput, *outputStateIn, nBatch, *cellScratch);
123  MatrixBatchVectorMultiplyAccumulate(*recurrentToOutputWeightsTensor,
124  nCell, nOutput, *outputStateIn, nBatch, *outputGateScratch);
125 
126  // For each batch and cell: update input gate.
127  if (!useCifg)
128  {
129  if (usePeephole)
130  {
131  VectorBatchVectorCwiseProductAccumulate(*cellToInputWeightsTensor,
132  nCell, *cellStateIn, nBatch, *inputGateScratch);
133  }
134  if (useLayerNorm)
135  {
136  MeanStddevNormalization(*inputGateScratchDecoder,
137  *inputGateScratch, nCell, nBatch, layerNormEpsilon);
138  VectorBatchVectorCwiseProduct(*inputLayerNormWeights,
139  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
140  VectorBatchVectorAdd(*inputGateBiasTensor,
141  nCell, *inputGateScratchDecoder, nBatch, *inputGateScratch);
142  }
143  Activation(*inputGateScratchDecoder, *inputGateScratch,
144  TensorInfo({nCell, nBatch}, outputType),
145  ActivationFunction::Sigmoid, 0, 0);
146  }
147 
148  // For each batch and cell: update forget gate.
149  if (usePeephole)
150  {
151  VectorBatchVectorCwiseProductAccumulate(*cellToForgetWeightsTensor, nCell,
152  *cellStateIn, nBatch, *forgetGateScratch);
153  }
154  if (useLayerNorm)
155  {
156  MeanStddevNormalization(*forgetGateScratchDecoder,
157  *forgetGateScratch, nCell, nBatch, layerNormEpsilon);
158  VectorBatchVectorCwiseProduct(*forgetLayerNormWeights,
159  nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
160  VectorBatchVectorAdd(*forgetGateBiasTensor,
161  nCell, *forgetGateScratchDecoder, nBatch, *forgetGateScratch);
162  }
163  Activation(*forgetGateScratchDecoder, *forgetGateScratch,
164  TensorInfo({nCell, nBatch}, outputType),
165  ActivationFunction::Sigmoid, 0, 0);
166 
167  // For each batch and cell: update the cell.
168  if (useLayerNorm)
169  {
170  MeanStddevNormalization(*cellScratchDecoder,
171  *cellScratch, nCell, nBatch, layerNormEpsilon);
172  VectorBatchVectorCwiseProduct(*cellLayerNormWeights,
173  nCell, *cellScratchDecoder, nBatch, *cellScratch);
174  VectorBatchVectorAdd(*cellBiasTensor,
175  nCell, *cellScratchDecoder, nBatch, *cellScratch);
176  }
177 
178  VectorVectorCwiseProduct(*forgetGateScratchDecoder, *cellStateIn, nBatch * nCell, *cellStateOut);
179 
180  ActivationFunction armnnActivationFunc = ActivationFunction::Sigmoid;
181  float a = 0;
182  float b = 0;
183  SetActivationParameters(descriptor.m_ActivationFunc, armnnActivationFunc, a, b);
184 
185  if (descriptor.m_ActivationFunc > 0)
186  {
187  Activation(*cellScratchDecoder, *cellScratch,
188  TensorInfo({nCell, nBatch}, outputType),
189  armnnActivationFunc, a, b);
190  }
191  if (useCifg)
192  {
193  Sub1Vector(*forgetGateScratchDecoder, nBatch * nCell, *forgetGateScratch);
195  *cellScratchDecoder, *forgetGateScratchDecoder, nBatch * nCell, *cellStateOut);
196  }
197  else
198  {
200  *cellScratchDecoder, *inputGateScratchDecoder, nBatch * nCell, *cellStateOut);
201  }
202  if (descriptor.m_ClippingThresCell > 0.0)
203  {
204  ClipVector(*cellStateOutDecoder, nBatch * nCell, descriptor.m_ClippingThresCell, *cellStateOut);
205  }
206 
207  // For each batch and cell: update the output gate.
208  if (usePeephole)
209  {
210  VectorBatchVectorCwiseProductAccumulate(*cellToOutputWeightsTensor,
211  nCell, *cellStateOutDecoder, nBatch, *outputGateScratch);
212  }
213  if (useLayerNorm)
214  {
215  MeanStddevNormalization(*outputGateScratchDecoder,
216  *outputGateScratch, nCell, nBatch, layerNormEpsilon);
217  VectorBatchVectorCwiseProduct(*outputLayerNormWeights,
218  nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
219  VectorBatchVectorAdd(*outputGateBiasTensor,
220  nCell, *outputGateScratchDecoder, nBatch, *outputGateScratch);
221  }
222  Activation(*outputGateScratchDecoder, *outputGateScratch,
223  TensorInfo({nCell, nBatch}, outputType),
224  ActivationFunction::Sigmoid, 0, 0);
225 
226  if (descriptor.m_ActivationFunc > 0)
227  {
228  Activation(*cellStateOutDecoder, *cellScratch,
229  TensorInfo({nCell, nBatch}, outputType),
230  armnnActivationFunc, a, b);
231  }
232 
233  VectorVectorCwiseProduct(*outputGateScratchDecoder, *cellScratchDecoder, nBatch * nCell, *outputGateScratch);
234 
235  // For each batch: update the projection and output_state.
236  if (descriptor.m_ProjectionEnabled)
237  {
238  if (projectionBiasTensor)
239  {
240  VectorBatchVectorAssign(*projectionBiasTensor,
241  nOutput, nBatch, *output);
242  }
243  MatrixBatchVectorMultiplyAccumulate(*projectionWeightsTensor,
244  nOutput, nCell, *outputGateScratchDecoder, nBatch, *output);
245 
246  if (descriptor.m_ClippingThresProj > 0.0)
247  {
248  ClipVector(*outputDecoder, nBatch * nOutput, descriptor.m_ClippingThresProj, *output);
249  }
250  }
251  else
252  {
253  CopyVector(*outputGateScratchDecoder, nBatch * nOutput, *output);
254  }
255 
256  CopyVector(*outputDecoder, nBatch * nOutput, *outputStateOut);
257 }
void MeanStddevNormalization(armnn::Decoder< float > &input_vector, armnn::Encoder< float > &output_vector, uint32_t v_size, uint32_t n_batch, float normalization_epsilon)
Definition: LstmUtils.cpp:40
void VectorBatchVectorAdd(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:16
void ClipVector(armnn::Decoder< float > &vector, uint32_t vSize, float absLimit, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:229
void Sub1Vector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &result)
Definition: LstmUtils.cpp:173
void CopyVector(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:244
void VectorBatchVectorCwiseProductAccumulate(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:131
void ZeroVector(armnn::Encoder< float > &vector, uint32_t vSize)
Definition: LstmUtils.cpp:76
void VectorVectorCwiseProduct(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:187
void VectorBatchVectorCwiseProduct(armnn::Decoder< float > &vector, uint32_t vSize, armnn::Decoder< float > &batchVector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:152
void MatrixBatchVectorMultiplyAccumulate(armnn::Decoder< float > &matrix, uint32_t mRows, uint32_t mCols, armnn::Decoder< float > &vector, uint32_t nBatch, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:87
DataType
Definition: Types.hpp:48
float Activation(float in, ActivationFunction function, float a, float b)
Definition: Activation.cpp:13
void VectorVectorCwiseProductAccumulate(armnn::Decoder< float > &vector1, armnn::Decoder< float > &vector2, uint32_t vSize, armnn::Encoder< float > &outResult)
Definition: LstmUtils.cpp:204
void VectorBatchVectorAssign(armnn::Decoder< float > &vector, uint32_t vSize, uint32_t nBatch, armnn::Encoder< float > &outBatchVector)
Definition: LstmUtils.cpp:113
void SetActivationParameters(uint32_t activation, armnn::ActivationFunction &outArmnnActivation, float &outA, float &outB)
Definition: LstmUtils.cpp:258
ActivationFunction
Definition: Types.hpp:86

◆ MakeDecoder() [1/4]

std::unique_ptr<Decoder<T> > armnn::MakeDecoder ( const TensorInfo info,
const void *  data = nullptr 
)
inline

Definition at line 66 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Boolean, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

67 {
68  switch(info.GetDataType())
69  {
70  case DataType::QAsymmS8:
71  {
72  return std::make_unique<QASymmS8Decoder>(
73  static_cast<const int8_t*>(data),
74  info.GetQuantizationScale(),
75  info.GetQuantizationOffset());
76  }
77  case DataType::QAsymmU8:
78  {
79  return std::make_unique<QASymm8Decoder>(
80  static_cast<const uint8_t*>(data),
81  info.GetQuantizationScale(),
82  info.GetQuantizationOffset());
83  }
84  case DataType::QSymmS16:
85  {
86  return std::make_unique<QSymm16Decoder>(
87  static_cast<const int16_t*>(data),
88  info.GetQuantizationScale(),
89  info.GetQuantizationOffset());
90  }
91  case DataType::BFloat16:
92  {
93  return std::make_unique<BFloat16Decoder>(static_cast<const BFloat16*>(data));
94  }
95  case DataType::Float16:
96  {
97  return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
98  }
99  case DataType::Float32:
100  {
101  return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
102  }
103  case DataType::Signed32:
104  {
105  return MakeSigned32Decoder(info, data);
106  }
107  case DataType::QSymmS8:
108  {
109  if (info.HasPerAxisQuantization())
110  {
111  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
112  return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
113  }
114  else
115  {
116  return std::make_unique<QSymmS8Decoder>(
117  static_cast<const int8_t*>(data),
118  info.GetQuantizationScale(),
119  info.GetQuantizationOffset());
120  }
121  }
123  {
124  return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
125  }
126  default:
127  {
128  ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
129  break;
130  }
131  }
132  return nullptr;
133 }
std::pair< unsigned int, std::vector< float > > GetPerAxisParams(const armnn::TensorInfo &info)
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
half_float::half Half
Definition: Half.hpp:18

◆ MakeDecoder() [2/4]

std::unique_ptr<Decoder<float> > armnn::MakeDecoder ( const TensorInfo info,
const void *  data 
)
inline

Definition at line 66 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Boolean, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

67 {
68  switch(info.GetDataType())
69  {
70  case DataType::QAsymmS8:
71  {
72  return std::make_unique<QASymmS8Decoder>(
73  static_cast<const int8_t*>(data),
74  info.GetQuantizationScale(),
75  info.GetQuantizationOffset());
76  }
77  case DataType::QAsymmU8:
78  {
79  return std::make_unique<QASymm8Decoder>(
80  static_cast<const uint8_t*>(data),
81  info.GetQuantizationScale(),
82  info.GetQuantizationOffset());
83  }
84  case DataType::QSymmS16:
85  {
86  return std::make_unique<QSymm16Decoder>(
87  static_cast<const int16_t*>(data),
88  info.GetQuantizationScale(),
89  info.GetQuantizationOffset());
90  }
91  case DataType::BFloat16:
92  {
93  return std::make_unique<BFloat16Decoder>(static_cast<const BFloat16*>(data));
94  }
95  case DataType::Float16:
96  {
97  return std::make_unique<Float16Decoder>(static_cast<const Half*>(data));
98  }
99  case DataType::Float32:
100  {
101  return std::make_unique<Float32Decoder>(static_cast<const float*>(data));
102  }
103  case DataType::Signed32:
104  {
105  return MakeSigned32Decoder(info, data);
106  }
107  case DataType::QSymmS8:
108  {
109  if (info.HasPerAxisQuantization())
110  {
111  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
112  return std::make_unique<QSymm8PerAxisDecoder>(static_cast<const int8_t*>(data), info);
113  }
114  else
115  {
116  return std::make_unique<QSymmS8Decoder>(
117  static_cast<const int8_t*>(data),
118  info.GetQuantizationScale(),
119  info.GetQuantizationOffset());
120  }
121  }
123  {
124  return std::make_unique<BooleanDecoder>(static_cast<const uint8_t*>(data));
125  }
126  default:
127  {
128  ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
129  break;
130  }
131  }
132  return nullptr;
133 }
std::pair< unsigned int, std::vector< float > > GetPerAxisParams(const armnn::TensorInfo &info)
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
half_float::half Half
Definition: Half.hpp:18

◆ MakeDecoder() [3/4]

std::unique_ptr<Decoder<bool> > armnn::MakeDecoder ( const TensorInfo info,
const void *  data 
)
inline

Definition at line 136 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, Boolean, and TensorInfo::GetDataType().

137 {
138  switch(info.GetDataType())
139  {
140  case DataType::Boolean:
141  {
142  return std::make_unique<BooleanDecoderBool>(static_cast<const uint8_t*>(data));
143  }
144  default:
145  {
146  ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
147  break;
148  }
149  }
150  return nullptr;
151 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ MakeDecoder() [4/4]

std::unique_ptr<Decoder<int32_t> > armnn::MakeDecoder ( const TensorInfo info,
const void *  data 
)
inline

Definition at line 154 of file Decoders.hpp.

References ARMNN_ASSERT_MSG, TensorInfo::GetDataType(), and Signed32.

155 {
156  switch(info.GetDataType())
157  {
158  case DataType::Signed32:
159  {
160  return std::make_unique<Int32ToInt32tDecoder>(static_cast<const int32_t*>(data));
161  }
162  default:
163  {
164  ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
165  break;
166  }
167  }
168  return nullptr;
169 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ MakeEncoder() [1/4]

std::unique_ptr<Encoder<T> > armnn::MakeEncoder ( const TensorInfo info,
void *  data = nullptr 
)
inline

Definition at line 21 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Boolean, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

22 {
23  switch(info.GetDataType())
24  {
26  {
27  return std::make_unique<QASymmS8Encoder>(
28  static_cast<int8_t*>(data),
29  info.GetQuantizationScale(),
30  info.GetQuantizationOffset());
31  }
33  {
34  return std::make_unique<QASymm8Encoder>(
35  static_cast<uint8_t*>(data),
36  info.GetQuantizationScale(),
37  info.GetQuantizationOffset());
38  }
39  case DataType::QSymmS8:
40  {
41  if (info.HasPerAxisQuantization())
42  {
43  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
44  return std::make_unique<QSymm8PerAxisEncoder>(
45  static_cast<int8_t*>(data),
46  params.second,
47  params.first);
48  }
49  else
50  {
51  return std::make_unique<QSymmS8Encoder>(
52  static_cast<int8_t*>(data),
53  info.GetQuantizationScale(),
54  info.GetQuantizationOffset());
55  }
56  }
58  {
59  return std::make_unique<QSymm16Encoder>(
60  static_cast<int16_t*>(data),
61  info.GetQuantizationScale(),
62  info.GetQuantizationOffset());
63  }
65  {
66  return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
67  }
69  {
70  return std::make_unique<BFloat16Encoder>(static_cast<armnn::BFloat16*>(data));
71  }
73  {
74  return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
75  }
77  {
78  return std::make_unique<Float32Encoder>(static_cast<float*>(data));
79  }
80  default:
81  {
82  ARMNN_ASSERT_MSG(false, "Unsupported target Data Type!");
83  break;
84  }
85  }
86  return nullptr;
87 }
std::pair< unsigned int, std::vector< float > > GetPerAxisParams(const armnn::TensorInfo &info)
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
half_float::half Half
Definition: Half.hpp:18

◆ MakeEncoder() [2/4]

std::unique_ptr<Encoder<float> > armnn::MakeEncoder ( const TensorInfo info,
void *  data 
)
inline

Definition at line 21 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, BFloat16, Float16, Float32, TensorInfo::GetDataType(), armnnUtils::GetPerAxisParams(), TensorInfo::GetQuantizationOffset(), TensorInfo::GetQuantizationScale(), TensorInfo::HasPerAxisQuantization(), QAsymmS8, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

22 {
23  switch(info.GetDataType())
24  {
26  {
27  return std::make_unique<QASymmS8Encoder>(
28  static_cast<int8_t*>(data),
29  info.GetQuantizationScale(),
30  info.GetQuantizationOffset());
31  }
33  {
34  return std::make_unique<QASymm8Encoder>(
35  static_cast<uint8_t*>(data),
36  info.GetQuantizationScale(),
37  info.GetQuantizationOffset());
38  }
39  case DataType::QSymmS8:
40  {
41  if (info.HasPerAxisQuantization())
42  {
43  std::pair<unsigned int, std::vector<float>> params = armnnUtils::GetPerAxisParams(info);
44  return std::make_unique<QSymm8PerAxisEncoder>(
45  static_cast<int8_t*>(data),
46  params.second,
47  params.first);
48  }
49  else
50  {
51  return std::make_unique<QSymmS8Encoder>(
52  static_cast<int8_t*>(data),
53  info.GetQuantizationScale(),
54  info.GetQuantizationOffset());
55  }
56  }
58  {
59  return std::make_unique<QSymm16Encoder>(
60  static_cast<int16_t*>(data),
61  info.GetQuantizationScale(),
62  info.GetQuantizationOffset());
63  }
65  {
66  return std::make_unique<Int32Encoder>(static_cast<int32_t*>(data));
67  }
69  {
70  return std::make_unique<BFloat16Encoder>(static_cast<armnn::BFloat16*>(data));
71  }
73  {
74  return std::make_unique<Float16Encoder>(static_cast<Half*>(data));
75  }
77  {
78  return std::make_unique<Float32Encoder>(static_cast<float*>(data));
79  }
80  default:
81  {
82  ARMNN_ASSERT_MSG(false, "Unsupported target Data Type!");
83  break;
84  }
85  }
86  return nullptr;
87 }
std::pair< unsigned int, std::vector< float > > GetPerAxisParams(const armnn::TensorInfo &info)
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
half_float::half Half
Definition: Half.hpp:18

◆ MakeEncoder() [3/4]

std::unique_ptr<Encoder<bool> > armnn::MakeEncoder ( const TensorInfo info,
void *  data 
)
inline

Definition at line 90 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, Boolean, and TensorInfo::GetDataType().

91 {
92  switch(info.GetDataType())
93  {
95  {
96  return std::make_unique<BooleanEncoder>(static_cast<uint8_t*>(data));
97  }
98  default:
99  {
100  ARMNN_ASSERT_MSG(false, "Cannot encode from boolean. Not supported target Data Type!");
101  break;
102  }
103  }
104  return nullptr;
105 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ MakeEncoder() [4/4]

std::unique_ptr<Encoder<int32_t> > armnn::MakeEncoder ( const TensorInfo info,
void *  data 
)
inline

Definition at line 108 of file Encoders.hpp.

References ARMNN_ASSERT_MSG, TensorInfo::GetDataType(), and Signed32.

109 {
110  switch(info.GetDataType())
111  {
112  case DataType::Signed32:
113  {
114  return std::make_unique<Int32ToInt32tEncoder>(static_cast<int32_t*>(data));
115  }
116  default:
117  {
118  ARMNN_ASSERT_MSG(false, "Unsupported Data Type!");
119  break;
120  }
121  }
122  return nullptr;
123 }
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ MakeInfo()

arm_compute::DetectionPostProcessLayerInfo armnn::MakeInfo ( const DetectionPostProcessDescriptor descriptor)

Definition at line 17 of file NeonDetectionPostProcessWorkload.cpp.

References DetectionPostProcessDescriptor::m_DetectionsPerClass, DetectionPostProcessDescriptor::m_MaxClassesPerDetection, DetectionPostProcessDescriptor::m_MaxDetections, DetectionPostProcessDescriptor::m_NmsIouThreshold, DetectionPostProcessDescriptor::m_NmsScoreThreshold, DetectionPostProcessDescriptor::m_NumClasses, and DetectionPostProcessDescriptor::m_UseRegularNms.

Referenced by NeonDetectionPostProcessValidate().

18 {
19  return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
20  descriptor.m_MaxClassesPerDetection,
21  descriptor.m_NmsScoreThreshold,
22  descriptor.m_NmsIouThreshold,
23  descriptor.m_NumClasses,
24  { descriptor.m_ScaleX,
25  descriptor.m_ScaleY,
26  descriptor.m_ScaleW,
27  descriptor.m_ScaleH },
28  descriptor.m_UseRegularNms,
29  descriptor.m_DetectionsPerClass);
30 }

◆ MakeOptimizations()

Optimizer::Optimizations armnn::MakeOptimizations ( Args &&...  args)

Definition at line 43 of file Optimizer.hpp.

References Append().

Referenced by ApplyBackendOptimizations(), Optimize(), and TEST_SUITE().

44 {
45  Optimizer::Optimizations optimizations;
46 
47  Append(optimizations, std::forward<Args>(args)...);
48 
49  return optimizations;
50 }
void Append(Optimizer::Optimizations &optimizations, Front &&front, Others &&... others)
Definition: Optimizer.hpp:36

◆ MakeOptional()

Optional<T> armnn::MakeOptional ( Args &&...  args)

Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> object.

Definition at line 305 of file Optional.hpp.

References CONSTRUCT_IN_PLACE.

306 {
307  return Optional<T>(CONSTRUCT_IN_PLACE, std::forward<Args>(args)...);
308 }
#define CONSTRUCT_IN_PLACE
Definition: Optional.hpp:41

◆ MakeTransformIterator()

constexpr TransformIterator<Function, Iterator> armnn::MakeTransformIterator ( Iterator  i,
Function  f 
)

Definition at line 81 of file TransformIterator.hpp.

Referenced by TEST_SUITE().

82 {
83  return TransformIterator<Function, Iterator>(i, f);
84 }

◆ MirrorPad()

void MirrorPad ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const ITensorHandle inputHandle,
ITensorHandle outputHandle,
const PadQueueDescriptor data 
)

Definition at line 59 of file MirrorPad.cpp.

References TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PaddingMode, PadDescriptor::m_PadList, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), Reflect, Encoder< IType >::Set(), and Symmetric.

Referenced by RefPadWorkload::ExecuteAsync().

64 {
65  auto padList = data.m_Parameters.m_PadList;
66  PaddingMode paddingMode = data.m_Parameters.m_PaddingMode;
67 
68  TensorShape outputShape = outputInfo.GetShape();
69  TensorShape inputShape = inputInfo.GetShape();
70 
71  unsigned int numOutputElements = outputInfo.GetNumElements();
72  unsigned int numInputDimensions = inputShape.GetNumDimensions();
73  assert(numInputDimensions == outputShape.GetNumDimensions());
74 
75  // If padding mode is Reflect then both paddings must be no greater than inputShape(i) - 1.
76  // If padding mode is Symmetric then both paddings must be no greater than inputShape(i).
77  const unsigned int isReflect = static_cast<unsigned int>(paddingMode == PaddingMode::Reflect);
78  for(unsigned int i = 0; i < padList.size(); ++i)
79  {
80  if(padList.at(i).first > (inputShape[i] - isReflect) ||
81  padList.at(i).second > (inputShape[i] - isReflect))
82  {
83  throw armnn::InvalidArgumentException("Paddings must be less (Reflect) or "
84  "equal (Symmetric) to the dimension size.");
85  }
86  }
87 
88  auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
89  auto outData = MakeEncoder<float>(outputInfo, outputHandle->Map());
90 
91  Decoder<float>& input = *inputData;
92  Encoder<float>& output = *outData;
93 
94  for(unsigned int idx = 0; idx < numOutputElements; ++idx)
95  {
96  // Get the coordinates of the current index in vector form. E.g inx 1 = [0, 0, 0, 1 ]
97  const std::vector<unsigned int> coord = IndexToCoord(outputShape, idx);
98 
99  std::vector<unsigned int> dimensions;
100  std::vector<unsigned int> coords;
101 
102  for(unsigned int i = 0; i < numInputDimensions; ++i)
103  {
104  dimensions.emplace_back(i);
105  coords.emplace_back(coord[i]);
106  }
107 
108  auto isInPadding = [&](unsigned int i)
109  {
110  return (coords[i] < padList[i].first || coords[i] > inputShape[i] + padList[i].first - 1);
111  };
112 
113  auto getReflectIndex = [&](unsigned int i) -> unsigned int
114  {
115  if(isInPadding(i))
116  {
117  if(coords[i] < padList[i].first)
118  {
119  return padList[i].first - coords[i];
120  }
121  else
122  {
123  return 2 * inputShape[i] + padList[i].first - 2 - coords[i];
124  }
125  }
126  return coords[i] - padList[i].first;
127  };
128 
129  auto getSymmetricIndex = [&](unsigned int i) -> unsigned int
130  {
131  if(isInPadding(i))
132  {
133  if(coords[i] < padList[i].first)
134  {
135  return padList[i].first - coords[i] - 1;
136  }
137  else
138  {
139  return 2 * inputShape[i] + padList[i].first - 1 - coords[i];
140  }
141  }
142  return coords[i] - padList[i].first;
143  };
144 
145  // Location of the value in the input tensor to use in the output.
146  std::vector<unsigned int> coordOfInput;
147 
148  // any_of works as a loop here to check if any of the dimensions are in the padding.
149  // If dimensions is in the padding area, then create the coordinates of the location in the
150  // input tensor to use in the output.
151  // E.g.
152  // Input tensor = [ 1, 2, 3 ], Rank = 1.
153  // Output tensor = [ 2, 1, 2, 3, 1 ] if Reflect or [ 1, 1, 2, 3, 3 ] if Symmetric with a padding of (1, 1).
154  // So it will either return [ 1 ] or [ 0 ] which is used to set the first value in the output tensor and so on.
155  if(std::any_of(dimensions.begin(), dimensions.end(), isInPadding))
156  {
157  switch(paddingMode)
158  {
159  case PaddingMode::Reflect:
160  {
161  for(unsigned int i = 0; i < numInputDimensions; ++i)
162  {
163  coordOfInput.emplace_back(getReflectIndex(i));
164  }
165  break;
166  }
167  case PaddingMode::Symmetric:
168  {
169  for(unsigned int i = 0; i < numInputDimensions; ++i)
170  {
171  coordOfInput.emplace_back(getSymmetricIndex(i));
172  }
173  break;
174  }
175  default:
176  throw InvalidArgumentException("Padding mode not supported.");
177  break;
178  }
179  }
180  else
181  {
182  for(unsigned int i = 0; i < numInputDimensions; ++i)
183  {
184  coordOfInput.emplace_back(coord[i] - padList[i].first);
185  }
186  }
187 
188  // Set output value using the coordinate of the input value to use.
189  const unsigned int indexOfInput = CoordToIndex(inputShape, coordOfInput);
190 
191  input[indexOfInput];
192  auto inputValue = input.Get();
193 
194  output[idx];
195  output.Set(inputValue);
196  }
197 }
PaddingMode
The padding mode controls whether the padding should be filled with constant values (Constant)...
Definition: Types.hpp:186

◆ MockBackendId()

constexpr const char* armnn::MockBackendId ( )

Definition at line 11 of file MockBackendId.hpp.

Referenced by MockBackend::GetIdStatic(), MockBackend::OptimizeSubgraphView(), and TEST_SUITE().

11 { return "MockAcc"; }

◆ MockImportBackendId()

constexpr const char* armnn::MockImportBackendId ( )

Definition at line 12 of file MockImportBackend.hpp.

Referenced by MockImportBackend::GetIdStatic(), and TEST_SUITE().

12 { return "MockRef"; }

◆ MockTensorHandleFactoryId()

constexpr const char* armnn::MockTensorHandleFactoryId ( )

Definition at line 14 of file MockTensorHandleFactory.hpp.

Referenced by MockTensorHandleFactory::GetIdStatic().

15 {
16  return "Arm/Mock/TensorHandleFactory";
17 }

◆ NeonAbsWorkloadValidate()

arm_compute::Status NeonAbsWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonAbsWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEAbsLayer::validate(&aclInput, &aclOutput);
23 }

◆ NeonActivationWorkloadValidate()

arm_compute::Status NeonActivationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ActivationDescriptor descriptor 
)

Definition at line 17 of file NeonActivationWorkload.cpp.

Referenced by NeonLayerSupport::IsActivationSupported().

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  const arm_compute::ActivationLayerInfo activationLayerInfo =
26 
27  return arm_compute::NEActivationLayer::validate(&aclInput,
28  &aclOutput,
29  activationLayerInfo);
30 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonAdditionWorkloadValidate()

arm_compute::Status NeonAdditionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 20 of file NeonAdditionWorkload.cpp.

Referenced by NeonLayerSupport::IsAdditionSupported(), and NeonBackend::OptimizeSubgraphView().

24 {
25  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
26  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
27  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
28 
29  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
30  activationDescriptor);
31 
32  return arm_compute::NEArithmeticAddition::validate(&aclInput0,
33  &aclInput1,
34  &aclOutput,
35  arm_compute::ConvertPolicy::SATURATE,
36  activationInfo);
37 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonArgMinMaxWorkloadValidate()

arm_compute::Status NeonArgMinMaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ArgMinMaxDescriptor descriptor 
)

Definition at line 31 of file NeonArgMinMaxWorkload.cpp.

Referenced by NeonLayerSupport::IsArgMinMaxSupported().

34 {
35  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
36  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
37 
38  auto numDims = input.GetNumDimensions();
39  auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, descriptor.m_Axis);
40  int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
41 
42  if (descriptor.m_Function == ArgMinMaxFunction::Max)
43  {
44  return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
45  arm_compute::ReductionOperation::ARG_IDX_MAX);
46  }
47  else
48  {
49  return arm_compute::NEArgMinMaxLayer::validate(&aclInput, aclAxis, &aclOutput,
50  arm_compute::ReductionOperation::ARG_IDX_MIN);
51  }
52 }
unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ NeonBackendId()

constexpr const char* armnn::NeonBackendId ( )

Definition at line 10 of file NeonBackendId.hpp.

Referenced by NeonBackend::GetIdStatic().

10 { return "CpuAcc"; }

◆ NeonBatchNormalizationValidate()

arm_compute::Status NeonBatchNormalizationValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo mean,
const TensorInfo var,
const TensorInfo beta,
const TensorInfo gamma,
const BatchNormalizationDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonBatchNormalizationWorkload.cpp.

Referenced by NeonLayerSupport::IsBatchNormalizationSupported(), and NeonBackend::OptimizeSubgraphView().

32 {
33  const arm_compute::TensorInfo aclInputInfo =
34  armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
35  const arm_compute::TensorInfo aclOutputInfo =
36  armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
37  const arm_compute::TensorInfo aclMeanInfo =
38  armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
39  const arm_compute::TensorInfo aclVarInfo =
40  armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
41  const arm_compute::TensorInfo aclBetaInfo =
42  armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
43  const arm_compute::TensorInfo aclGammaInfo =
44  armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
45 
46  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
47  activationDescriptor);
48 
49  return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
50  &aclOutputInfo,
51  &aclMeanInfo,
52  &aclVarInfo,
53  &aclBetaInfo,
54  &aclGammaInfo,
55  descriptor.m_Eps,
56  activationInfo);
57 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonBatchToSpaceNdWorkloadValidate()

arm_compute::Status NeonBatchToSpaceNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const BatchToSpaceNdDescriptor descriptor 
)

Definition at line 20 of file NeonBatchToSpaceNdWorkload.cpp.

Referenced by NeonLayerSupport::IsBatchToSpaceNdSupported().

23 {
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
25  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
26 
27  // ArmNN blockShape is [H, W] Cl asks for W, H
28  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
29  int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
30 
31  const arm_compute::Status aclStatus = arm_compute::NEBatchToSpaceLayer::validate(&aclInputInfo,
32  blockWidth,
33  blockHeight,
34  &aclOutputInfo);
35  return aclStatus;
36 }
Status
enumeration
Definition: Types.hpp:42
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ NeonCastValidate()

arm_compute::Status NeonCastValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonCastWorkload.cpp.

Referenced by NeonLayerSupport::IsCastSupported().

20 {
21  arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  return arm_compute::NECast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
25 }

◆ NeonChannelShuffleValidate()

arm_compute::Status NeonChannelShuffleValidate ( const TensorInfo input,
const TensorInfo output,
const ChannelShuffleDescriptor descriptor 
)

Definition at line 17 of file NeonChannelShuffleWorkload.cpp.

Referenced by NeonLayerSupport::IsChannelShuffleSupported().

20 {
21  arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  // In Arm NN and in NNAPI, channel shuffle implementation is datalayout agnostic and it has axis as a parameter.
25  // The channel shuffle Implementation for Neon is dependent on datalayout and does not have axis as a parameter,
26  // it only supports channel shuffle for 4D tensors in dimension C (1 or 3).
27  arm_compute::DataLayout aclDataLayout;
28  if (input.GetNumDimensions() == 4)
29  {
30  switch (descriptor.m_Axis)
31  {
32  case 1:
33  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
34  break;
35  case 3:
36  aclDataLayout = ConvertDataLayout(armnn::DataLayout::NHWC);
37  break;
38  default:
39  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported axis"};
40  }
41  aclInputInfo.set_data_layout(aclDataLayout);
42  aclOutputInfo.set_data_layout(aclDataLayout);
43  return arm_compute::NEChannelShuffleLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_NumGroups);
44  }
45  else
46  {
47  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported number of dimensions"};
48  }
49 }
DataLayout
Definition: Types.hpp:62
Status
enumeration
Definition: Types.hpp:42

◆ NeonComparisonWorkloadValidate()

arm_compute::Status NeonComparisonWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ComparisonDescriptor descriptor 
)

Definition at line 16 of file NeonComparisonWorkload.cpp.

Referenced by NeonLayerSupport::IsComparisonSupported().

20 {
21  const arm_compute::TensorInfo aclInput0 = BuildArmComputeTensorInfo(input0);
22  const arm_compute::TensorInfo aclInput1 = BuildArmComputeTensorInfo(input1);
23  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
24 
25  const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(descriptor);
26 
27  const arm_compute::Status aclStatus = arm_compute::NEElementwiseComparison::validate(&aclInput0,
28  &aclInput1,
29  &aclOutput,
30  comparisonOperation);
31  return aclStatus;
32 }
ComparisonOperation
Definition: Types.hpp:108
Status
enumeration
Definition: Types.hpp:42
arm_compute::ComparisonOperation ConvertComparisonOperationToAcl(const ComparisonDescriptor &descriptor)

◆ NeonConcatWorkloadValidate()

arm_compute::Status NeonConcatWorkloadValidate ( const std::vector< const TensorInfo *> &  inputs,
const TensorInfo output,
const OriginsDescriptor descriptor 
)

Definition at line 27 of file NeonConcatWorkload.cpp.

Referenced by NeonLayerSupport::IsConcatSupported().

31 {
32  std::vector<arm_compute::TensorInfo> aclInputs;
33  for (const TensorInfo* input : inputs)
34  {
35  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
36  aclInputs.emplace_back(aclInputInfo);
37  }
38  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
39  std::vector<const arm_compute::ITensorInfo*> aclInputPtrs;
40  for (arm_compute::ITensorInfo& input : aclInputs)
41  {
42  aclInputPtrs.emplace_back(&input);
43  }
44 
45  size_t aclAxis = CalcAxis(descriptor);
46  return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
47 }

◆ NeonConstantWorkloadValidate()

arm_compute::Status NeonConstantWorkloadValidate ( const TensorInfo output)

Definition at line 20 of file NeonConstantWorkload.cpp.

Referenced by NeonLayerSupport::IsConstantSupported().

21 {
22  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
23 
24  std::array<arm_compute::DataType,9> supportedTypes = {
25  arm_compute::DataType::BFLOAT16,
26  arm_compute::DataType::F16,
27  arm_compute::DataType::F32,
28  arm_compute::DataType::QASYMM8,
29  arm_compute::DataType::QASYMM8_SIGNED,
30  arm_compute::DataType::QSYMM16,
31  arm_compute::DataType::QSYMM8,
32  arm_compute::DataType::QSYMM8_PER_CHANNEL,
33  arm_compute::DataType::S32
34  };
35  auto it = std::find(begin(supportedTypes), end(supportedTypes), neonOutputInfo.data_type());
36 
37  if (it != end(supportedTypes))
38  {
39  return arm_compute::Status{};
40  }
41  else
42  {
43  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported DataType"};
44  }
45 }
Status
enumeration
Definition: Types.hpp:42

◆ NeonConvolution2dWorkloadValidate()

arm_compute::Status NeonConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonConvolution2dWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by NeonLayerSupport::IsConvolution2dSupported(), and NeonBackend::OptimizeSubgraphView().

31 {
32  // arm_compute::NEConvolutionLayer supports both const and non const
33  // weights. However, in the case of non const weights we'd have to call
34  // prepare or configure for each inference which we're not setup to do just yet.
35  if (!weights.IsConstant())
36  {
37  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
38  "ArmNN NeonConvolution2dWorkload does not support non constant weights."};
39  }
40 
41  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
42  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
43  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
44  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
45 
46  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
47  descriptor.m_DilationY);
48 
49  arm_compute::TensorInfo aclBiasesInfo;
50  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
51 
52  if (descriptor.m_BiasEnabled)
53  {
54  ARMNN_ASSERT(biases.has_value());
55  // Same for bias as weights. We don't currently support non const.
56  if (!biases.value().IsConstant())
57  {
58  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
59  "ArmNN NeonConvolution2dWorkload does not support non constant bias."};
60  }
61  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
62  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
63  optionalAclBiasesInfo = &aclBiasesInfo;
64  }
65 
66  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
67 
68  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
69  activationDescriptor);
70 
71  return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
72  &aclWeightsInfo,
73  optionalAclBiasesInfo,
74  &aclOutputInfo,
75  layerInfo,
76  arm_compute::WeightsInfo(),
77  aclDilationInfo,
78  activationInfo,
79  isFastMathEnabled);
80 }
Status
enumeration
Definition: Types.hpp:42
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonConvolution3dWorkloadValidate()

arm_compute::Status NeonConvolution3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Convolution3dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
bool  isFastMathEnabled,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonConvolution3dWorkload.cpp.

Referenced by NeonLayerSupport::IsConvolution3dSupported().

31 {
32  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34  arm_compute::TensorInfo aclBiasesInfo;
35  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
36  if (descriptor.m_BiasEnabled)
37  {
38  ARMNN_ASSERT(biases.has_value());
39 
40  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
41  optionalAclBiasesInfo = &aclBiasesInfo;
42  }
43  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
44 
45  const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
46  isFastMathEnabled,
47  activationDescriptor);
48 
49  return arm_compute::NEConv3D::validate(&aclInputInfo,
50  &aclWeightsInfo,
51  optionalAclBiasesInfo,
52  &aclOutputInfo,
53  aclConv3DInfo);
54 }
arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor...
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ NeonDepthToSpaceWorkloadValidate()

arm_compute::Status NeonDepthToSpaceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthToSpaceDescriptor descriptor 
)

Definition at line 19 of file NeonDepthToSpaceWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsDepthToSpaceSupported().

22 {
23  DataLayout dataLayout = descriptor.m_DataLayout;
24  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
25  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
26 
27  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
28 
29  return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
30 }
DataLayout
Definition: Types.hpp:62
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ NeonDepthwiseConvolutionWorkloadValidate()

arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const DepthwiseConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const ActivationDescriptor activationDescriptor 
)

Definition at line 29 of file NeonDepthwiseConvolutionWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by NeonLayerSupport::IsDepthwiseConvolutionSupported(), NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(), and NeonBackend::OptimizeSubgraphView().

35 {
36  // The Neon implemented workload does support both const and non const
37  // weights. However, in the case of non const weights we'd have to call
38  // prepare or configure for each inference which we're not setup to do just yet.
39  if (!weights.IsConstant())
40  {
41  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
42  "ArmNN NeonDepthwiseConv2dWorkload does not support non constant weights."};
43  }
44 
45  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
46  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
47 
48  // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
49  //
50  // ACL format for weights for depthwise is:
51  // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
52  // - [1, C, H, W] for [N, C, H, W] input/output layout
53  //
54  // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
55  // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
56  // so we do the permute here for the TensorInfo weights.
57  unsigned int aclDepthMultiplier;
58  TensorInfo weightsPermuted;
59  std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
60 
61  // Convert the weights into the compute library format
62  arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
63  aclWeightsInfo.set_are_values_constant(weights.IsConstant());
64 
65  arm_compute::TensorInfo aclBiasesInfo;
66  arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
67  if (descriptor.m_BiasEnabled)
68  {
69  ARMNN_ASSERT(biases.has_value());
70  // Same for bias as weights. We don't currently support non const.
71  if (!biases.value().IsConstant())
72  {
73  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
74  "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."};
75  }
76  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
77  aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
78  optionalAclBiasesInfo = &aclBiasesInfo;
79  }
80 
81  arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
82  const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
83  descriptor.m_DilationX, descriptor.m_DilationY);
84 
85  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
86  activationDescriptor);
87 
88  return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
89  &aclWeightsInfo,
90  optionalAclBiasesInfo,
91  &aclOutputInfo,
92  aclPadStrideInfo,
93  aclDepthMultiplier,
94  activationInfo,
95  aclDilationInfo);
96 }
bool IsConstant() const
Definition: Tensor.cpp:509
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
uint32_t m_DilationY
Dilation factor value for height dimension.
uint32_t m_DilationX
Dilation factor value for width dimension.
bool has_value() const noexcept
Definition: Optional.hpp:53
Status
enumeration
Definition: Types.hpp:42
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::tuple< TensorInfo, unsigned int > Convert1HWOTensorInfoToAcl(const TensorInfo &weightInfo, const TensorInfo &inputInfo, const DataLayout dataLayout)
Weights for depthwise have a datalayout of [1,H,W,O] = [1,H,W,I*M] This function coverts a TensorInfo...
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonDequantizeWorkloadValidate()

arm_compute::Status NeonDequantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 22 of file NeonDequantizeWorkload.cpp.

Referenced by NeonLayerSupport::IsDequantizeSupported().

24 {
25  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
27 
28  return arm_compute::NEDequantizationLayer::validate(&aclInput, &aclOutput);
29 }

◆ NeonDetected()

bool NeonDetected ( )

Definition at line 37 of file Utils.cpp.

38 {
39 #if !defined(ARMNN_BUILD_BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
40  auto hwcaps= getauxval(AT_HWCAP);
41 #endif
42 
43 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__aarch64__)
44 
45  if (hwcaps & HWCAP_ASIMD)
46  {
47  // On an arm64 device with Neon.
48  return true;
49  }
50  else
51  {
52  // On an arm64 device without Neon.
53  return false;
54  }
55 
56 #endif
57 #if !defined(ARMNN_BUILD_BARE_METAL) && defined(__arm__)
58 
59  if (hwcaps & HWCAP_NEON)
60  {
61  // On an armhf device with Neon.
62  return true;
63  }
64  else
65  {
66  // On an armhf device without Neon.
67  return false;
68  }
69 
70 #endif
71 
72  // This method of Neon detection is only supported on Linux so in order to prevent a false negative
73  // we will return true in cases where detection did not run.
74  return true;
75 }

◆ NeonDetectionPostProcessValidate()

arm_compute::Status NeonDetectionPostProcessValidate ( const TensorInfo boxEncodings,
const TensorInfo scores,
const TensorInfo anchors,
const TensorInfo detectionBoxes,
const TensorInfo detectionClasses,
const TensorInfo detectionScores,
const TensorInfo numDetections,
const DetectionPostProcessDescriptor descriptor 
)

Definition at line 32 of file NeonDetectionPostProcessWorkload.cpp.

References info, and MakeInfo().

40 {
41  arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
42 
43  const arm_compute::TensorInfo aclBoxEncodings =
44  armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
45 
46  const arm_compute::TensorInfo aclScores =
47  armcomputetensorutils::BuildArmComputeTensorInfo(scores);
48 
49  const arm_compute::TensorInfo aclAnchors =
50  armcomputetensorutils::BuildArmComputeTensorInfo(anchors);
51 
52  arm_compute::TensorInfo aclDetectionBoxes =
53  armcomputetensorutils::BuildArmComputeTensorInfo(detectionBoxes);
54 
55  arm_compute::TensorInfo aclDetectionClasses =
56  armcomputetensorutils::BuildArmComputeTensorInfo(detectionClasses);
57 
58  arm_compute::TensorInfo aclDetectionScores =
59  armcomputetensorutils::BuildArmComputeTensorInfo(detectionScores);
60 
61  arm_compute::TensorInfo aclNumDetections =
62  armcomputetensorutils::BuildArmComputeTensorInfo(numDetections);
63 
64  return arm_compute::NEDetectionPostProcessLayer::validate(
65  &aclBoxEncodings,
66  &aclScores,
67  &aclAnchors,
68  &aclDetectionBoxes,
69  &aclDetectionClasses,
70  &aclDetectionScores,
71  &aclNumDetections,
72  info);
73 }
arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor &descriptor)

◆ NeonDivisionWorkloadValidate()

arm_compute::Status NeonDivisionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 18 of file NeonDivisionWorkload.cpp.

Referenced by NeonLayerSupport::IsDivisionSupported(), and NeonBackend::OptimizeSubgraphView().

22 {
23  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
24  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
25  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
26 
27  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
28  activationDescriptor);
29 
30  return arm_compute::NEElementwiseDivision::validate(&aclInput0,
31  &aclInput1,
32  &aclOutput,
33  activationInfo);
34 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonExpWorkloadValidate()

arm_compute::Status NeonExpWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonExpWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEExpLayer::validate(&aclInput, &aclOutput);
23 }

◆ NeonFullyConnectedWorkloadValidate()

arm_compute::Status NeonFullyConnectedWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TensorInfo weights,
const Optional< TensorInfo > &  biases,
const FullyConnectedDescriptor descriptor,
const ActivationDescriptor activationDescriptor 
)

Definition at line 24 of file NeonFullyConnectedWorkload.cpp.

References TensorInfo::IsConstant().

Referenced by NeonLayerSupport::IsFullyConnectedSupported(), and NeonBackend::OptimizeSubgraphView().

30 {
31  // The NEON implemented workload does support both const and non const
32  // weights. However, in the case of non const weights we'd have to call
33  // prepare or configure for each inference which we're not setup to do just yet.
34  if (!weights.IsConstant())
35  {
36  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
37  "Arm NN NeonFullyConnectedWorkload does not support non constant weights."};
38  }
39  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
40  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
41  arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
42  aclWeights.set_are_values_constant(weights.IsConstant());
43 
44  arm_compute::TensorInfo aclBiases;
45  arm_compute::TensorInfo* optionalAclBiases = nullptr;
46  if (descriptor.m_BiasEnabled)
47  {
48  ARMNN_ASSERT(biases.has_value());
49  // Same for bias as weights. We don't currently support non const.
50  if (!biases.value().IsConstant())
51  {
52  return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
53  "Arm NN NeonFullyConnectedWorkload does not support non constant bias."};
54  }
55  aclBiases = BuildArmComputeTensorInfo(biases.value());
56  aclBiases.set_are_values_constant(biases.value().IsConstant());
57  optionalAclBiases = &aclBiases;
58  }
59 
60  const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
61  ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
62  return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
63  &aclWeights,
64  optionalAclBiases,
65  &aclOutput,
66  fullyConnectedLayerInfo);
67 }
arm_compute::FullyConnectedLayerInfo ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor &fullyConnectedDesc, const ActivationDescriptor *activationDesc)
Status
enumeration
Definition: Types.hpp:42
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ NeonGatherNdWorkloadValidate()

arm_compute::Status NeonGatherNdWorkloadValidate ( const TensorInfo paramsInfo,
const TensorInfo indicesInfo,
const TensorInfo outputInfo 
)

Validate Mul

Validate ReduceSum

Validate Gather

Validate Reshape

Return OK if all the layers are valid

Definition at line 14 of file NeonGatherNdWorkload.cpp.

References CalculateGatherNdKeyIndices(), and TensorInfo::SetShape().

Referenced by NeonLayerSupport::IsGatherNdSupported().

17 {
18  // Calculate ND, K, W, C.
19  std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
20 
21  /// Validate Mul
22  // Indices with shape { W, ND }
23  armnn::TensorInfo indices_W_ND_Info = indicesInfo;
24  indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
25  const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
26 
27  // Flattened coefficients with shape { ND }
28  armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
29  flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
30  const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
31 
32  // Output of Mul with shape { W, ND }
33  const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
34 
35  auto statusMul = arm_compute::NEPixelWiseMultiplication::validate(&aclIndicesInfo,
36  &aclFlattenedCoeffInfo,
37  &aclOutputMulInfo,
38  1.0f,
39  arm_compute::ConvertPolicy::WRAP,
40  arm_compute::RoundingPolicy::TO_ZERO,
41  arm_compute::ActivationLayerInfo());
42 
43  /// Validate ReduceSum
44  // Flattened indices with shape { W }
45  armnn::TensorInfo flattenedIndices_Info = indicesInfo;
46  flattenedIndices_Info.SetShape({ keyIndices["W"] });
47  const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
48 
49  const std::vector<unsigned int> armnnReduceAxes(1, 1);
50  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
51  indices_W_ND_Info.GetNumDimensions(),
52  armnnReduceAxes);
53 
54  auto statusReduceSum = arm_compute::NEReductionOperation::validate(&aclOutputMulInfo,
55  &aclFlattenedIndicesInfo,
56  static_cast<unsigned int>(coords[0]),
57  arm_compute::ReductionOperation::SUM,
58  false);
59 
60  /// Validate Gather
61  // Params with shape { K, C }
62  armnn::TensorInfo params_K_C_Info = paramsInfo;
63  params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
64  const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
65 
66  // Output of gather with shape { W, C }
67  armnn::TensorInfo outputGather_Info = outputInfo;
68  outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
69  const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
70 
71  auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
72  auto statusGather =
73  arm_compute::NEGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
74 
75  /// Validate Reshape
76  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
77 
78  auto statusReshape = arm_compute::NEReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
79 
80  /// Return OK if all the layers are valid
81  auto okCode = arm_compute::ErrorCode::OK;
82  if (statusMul.error_code() == okCode &&
83  statusReduceSum.error_code() == okCode &&
84  statusGather.error_code() == okCode &&
85  statusReshape.error_code() == okCode)
86  {
87  return arm_compute::Status(arm_compute::ErrorCode::OK,
88  "All GatherND layers validate status OK.");
89  }
90  else
91  {
92  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
93  "GatherND layer validate status failed.");
94  }
95 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) ...
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:193
Status
enumeration
Definition: Types.hpp:42
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:195

◆ NeonGatherWorkloadValidate()

arm_compute::Status NeonGatherWorkloadValidate ( const TensorInfo input,
const TensorInfo indices,
const TensorInfo output,
const GatherDescriptor descriptor 
)

Definition at line 13 of file NeonGatherWorkload.cpp.

Referenced by NeonLayerSupport::IsGatherSupported().

17 {
18  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
19  const arm_compute::TensorInfo aclIndices = BuildArmComputeTensorInfo(indices);
20  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
21 
22  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
23 
24  return arm_compute::NEGather::validate(&aclInput, &aclIndices, &aclOutput, aclAxis);
25 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...

◆ NeonInstanceNormalizationWorkloadValidate()

arm_compute::Status NeonInstanceNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const InstanceNormalizationDescriptor descriptor 
)

Definition at line 19 of file NeonInstanceNormalizationWorkload.cpp.

Referenced by NeonLayerSupport::IsInstanceNormalizationSupported().

22 {
23  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  return arm_compute::NEInstanceNormalizationLayer::validate(&aclInputInfo,
27  &aclOutputInfo,
28  descriptor.m_Gamma,
29  descriptor.m_Beta,
30  descriptor.m_Eps);
31 }

◆ NeonL2NormalizationWorkloadValidate()

arm_compute::Status NeonL2NormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const L2NormalizationDescriptor descriptor 
)

Definition at line 19 of file NeonL2NormalizationFloatWorkload.cpp.

Referenced by NeonLayerSupport::IsL2NormalizationSupported().

22 {
23  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
24  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
25 
26  int axis = (descriptor.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
27 
28  return arm_compute::NEL2NormalizeLayer::validate(&aclInput, &aclOutput, axis, descriptor.m_Eps);
29 }

◆ NeonLogicalAndWorkloadValidate()

arm_compute::Status NeonLogicalAndWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 18 of file NeonLogicalAndWorkload.cpp.

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

21 {
22  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
23  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::NELogicalAnd::validate(&aclInputInfo0,
27  &aclInputInfo1,
28  &aclOutputInfo);
29  return aclStatus;
30 }
Status
enumeration
Definition: Types.hpp:42

◆ NeonLogicalNotWorkloadValidate()

arm_compute::Status NeonLogicalNotWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 19 of file NeonLogicalNotWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

21 {
22  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
24 
25  const arm_compute::Status aclStatus = arm_compute::NELogicalNot::validate(&aclInputInfo,
26  &aclOutputInfo);
27  return aclStatus;
28 }
Status
enumeration
Definition: Types.hpp:42

◆ NeonLogicalOrWorkloadValidate()

arm_compute::Status NeonLogicalOrWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 18 of file NeonLogicalOrWorkload.cpp.

Referenced by NeonLayerSupport::IsLogicalBinarySupported().

21 {
22  const arm_compute::TensorInfo aclInputInfo0 = BuildArmComputeTensorInfo(input0);
23  const arm_compute::TensorInfo aclInputInfo1 = BuildArmComputeTensorInfo(input1);
24  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
25 
26  const arm_compute::Status aclStatus = arm_compute::NELogicalOr::validate(&aclInputInfo0,
27  &aclInputInfo1,
28  &aclOutputInfo);
29  return aclStatus;
30 }
Status
enumeration
Definition: Types.hpp:42

◆ NeonLogSoftmaxWorkloadValidate()

arm_compute::Status NeonLogSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const LogSoftmaxDescriptor descriptor 
)

Definition at line 19 of file NeonLogSoftmaxWorkload.cpp.

Referenced by NeonLayerSupport::IsLogSoftmaxSupported().

22 {
23  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
26  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
27  return arm_compute::NELogSoftmaxLayer::validate(&aclInputInfo,
28  &aclOutputInfo,
29  descriptor.m_Beta,
30  aclAxis);
31 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...

◆ NeonLogWorkloadValidate()

arm_compute::Status NeonLogWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonLogWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NELogLayer::validate(&aclInput, &aclOutput);
23 }

◆ NeonLstmFloatWorkloadValidate()

arm_compute::Status NeonLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo scratchBuffer,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const LstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 253 of file NeonLstmFloatWorkload.cpp.

Referenced by NeonLayerSupport::IsLstmSupported().

262 {
263  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
264 
265  // The inputs and outputs
266  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
267  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
268  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
269  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
270  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
271  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
272  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
273 
274  // Basic parameters
275  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
276  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
277  const arm_compute::TensorInfo aclInputToCellWeightsInfo
278  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
279  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
280  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
281  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
282  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
283  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
284  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
285  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
286  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
287  const arm_compute::TensorInfo aclForgetGateBiasInfo
288  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
289  const arm_compute::TensorInfo aclCellBiasInfo
290  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
291  const arm_compute::TensorInfo aclOutputGateBiasInfo
292  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
293 
294  arm_compute::TensorInfo aclInputToInputWeightsInfo;
295  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
296  arm_compute::TensorInfo aclCellToInputWeightsInfo;
297  arm_compute::TensorInfo aclInputGateBiasInfo;
298  arm_compute::TensorInfo aclProjectionWeightsInfo;
299  arm_compute::TensorInfo aclProjectionBiasInfo;
300  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
301  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
302 
303  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
304  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
305  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
306  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
307 
308 
309  if (!descriptor.m_CifgEnabled)
310  {
311  if (descriptor.m_PeepholeEnabled)
312  {
313  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
314  }
315  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
316  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
317  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
318 
319  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
320  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
321  &aclInputGateBiasInfo);
322  }
323 
324  if (descriptor.m_ProjectionEnabled)
325  {
326  if (paramsInfo.m_ProjectionBias != nullptr)
327  {
328  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
329  }
330  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
331 
332  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
333  paramsInfo.m_ProjectionBias != nullptr ?
334  &aclProjectionBiasInfo : nullptr);
335  }
336 
337  if (descriptor.m_PeepholeEnabled)
338  {
339  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
340  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
341 
342  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
343  }
344 
345  if (descriptor.m_LayerNormEnabled)
346  {
347  if (!descriptor.m_CifgEnabled)
348  {
349  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
350  }
351  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
352  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
353  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
354 
355  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ?
356  nullptr : &aclInputLayerNormWeightsInfo,
357  &aclForgetLayerNormWeightsInfo,
358  &aclCellLayerNormWeightsInfo,
359  &aclOutputLayerNormWeightsInfo);
360  }
361 
362  float cell_threshold = descriptor.m_ClippingThresCell;
363  float projection_threshold = descriptor.m_ClippingThresProj;
364 
365  // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
366  arm_compute::ActivationLayerInfo activationLayerInfo =
367  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
368 
369  return arm_compute::NELSTMLayer::validate(&aclInputInfo,
370  &aclInputToForgetWeightsInfo,
371  &aclInputToCellWeightsInfo,
372  &aclInputToOutputWeightsInfo,
373  &aclRecurrentToForgetWeightsInfo,
374  &aclRecurrentToCellWeightsInfo,
375  &aclRecurrentToOutputWeightsInfo,
376  &aclForgetGateBiasInfo,
377  &aclCellBiasInfo,
378  &aclOutputGateBiasInfo,
379  &aclOutputStateInInfo,
380  &aclCellStateInInfo,
381  &aclScratchBufferInfo,
382  &aclOutputStateOutInfo,
383  &aclCellStateOutInfo,
384  &aclOutputInfo,
385  lstm_params_info,
386  activationLayerInfo,
387  cell_threshold,
388  projection_threshold);
389 }
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)

◆ NeonMaximumWorkloadValidate()

arm_compute::Status NeonMaximumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Definition at line 14 of file NeonMaximumWorkload.cpp.

Referenced by NeonLayerSupport::IsMaximumSupported().

17 {
18  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
19  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NEElementwiseMax::validate(&aclInput0,
23  &aclInput1,
24  &aclOutput);
25 }

◆ NeonMeanWorkloadValidate()

arm_compute::Status NeonMeanWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const MeanDescriptor descriptor 
)

Definition at line 18 of file NeonMeanWorkload.cpp.

Referenced by NeonLayerSupport::IsMeanSupported().

21 {
22  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
23  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
26  input.GetNumDimensions(),
27  descriptor.m_Axis);
28 
29  return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
30 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates

◆ NeonMinimumWorkloadValidate()

arm_compute::Status NeonMinimumWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output 
)

Validate function for validating the inputs and output.

Parameters
[in]input0The input0 value to be validated.
[in]input1The input1 value to be validated.
[in]outputThe output value to be validated.

Definition at line 15 of file NeonMinimumWorkload.cpp.

Referenced by NeonLayerSupport::IsMinimumSupported().

18 {
19  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
20  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEElementwiseMin::validate(&aclInput0,
24  &aclInput1,
25  &aclOutput);
26 }

◆ NeonMultiplicationWorkloadValidate()

arm_compute::Status NeonMultiplicationWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 19 of file NeonMultiplicationWorkload.cpp.

Referenced by NeonLayerSupport::IsMultiplicationSupported(), and NeonBackend::OptimizeSubgraphView().

23 {
24  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
25  const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
26  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
28  auto convertPolicy = (IsQuantizedType(input0.GetDataType()) || IsQuantizedType(input1.GetDataType())) ?
29  arm_compute::ConvertPolicy::SATURATE :
30  arm_compute::ConvertPolicy::WRAP;
31 
32  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
33  activationDescriptor);
34 
35  // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
36  // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
37  // ignored for F32 tensors.
38  return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1,
39  &aclInput2,
40  &aclOutput,
41  1.0f,
42  convertPolicy,
43  arm_compute::RoundingPolicy::TO_ZERO,
44  activationInfo);
45 }
constexpr bool IsQuantizedType()
Definition: TypesUtils.hpp:280
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonNegWorkloadValidate()

arm_compute::Status NeonNegWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonNegWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NENegLayer::validate(&aclInput, &aclOutput);
23 }

◆ NeonNormalizationWorkloadValidate()

arm_compute::Status NeonNormalizationWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const NormalizationDescriptor descriptor 
)

Definition at line 49 of file NeonNormalizationFloatWorkload.cpp.

Referenced by NeonLayerSupport::IsNormalizationSupported().

52 {
53  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
54  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
55 
56  arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
57 
58  return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
59 }

◆ NeonPadWorkloadValidate()

arm_compute::Status NeonPadWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PadDescriptor descriptor 
)

Definition at line 59 of file NeonPadWorkload.cpp.

Referenced by NeonLayerSupport::IsPadSupported().

62 {
63  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
64  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
65 
66  std::vector<std::pair<unsigned int, unsigned int>> reversed_PadList(descriptor.m_PadList.size());
67 
68  std::reverse_copy(std::begin(descriptor.m_PadList),
69  std::end(descriptor.m_PadList),
70  std::begin(reversed_PadList));
71 
72  arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(reversed_PadList);
73 
74  // PixelValue is currently unused when validating, but it's required to pass in PaddingMode.
75  arm_compute::PixelValue pixelValue = GetPixelValue(&aclInputInfo, descriptor.m_PadValue);
76  return arm_compute::NEPadLayer::validate(&aclInputInfo,
77  &aclOutputInfo,
78  padList,
79  pixelValue,
80  ConvertPaddingModeToAcl(descriptor.m_PaddingMode));
81 }
arm_compute::PaddingMode ConvertPaddingModeToAcl(const PaddingMode &paddingMode)

◆ NeonPermuteWorkloadValidate()

arm_compute::Status NeonPermuteWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const PermuteDescriptor descriptor 
)

Definition at line 15 of file NeonPermuteWorkload.cpp.

Referenced by NeonLayerSupport::IsPermuteSupported().

18 {
19  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
22 
23  return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
24  armcomputetensorutils::BuildArmComputePermutationVector(mappings));
25 }

◆ NeonPooling2dWorkloadValidate()

arm_compute::Status NeonPooling2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling2dDescriptor descriptor 
)

Definition at line 22 of file NeonPooling2dWorkload.cpp.

Referenced by NeonLayerSupport::IsPooling2dSupported().

25 {
26  const arm_compute::TensorInfo aclInputInfo =
27  BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
28  const arm_compute::TensorInfo aclOutputInfo =
29  BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
30 
31  arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
32 
33  return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
34 }

◆ NeonPooling3dWorkloadValidate()

arm_compute::Status NeonPooling3dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const Pooling3dDescriptor descriptor 
)

Definition at line 15 of file NeonPooling3dWorkload.cpp.

Referenced by NeonLayerSupport::IsPooling3dSupported().

18  {
19  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
20  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
21  arm_compute::Pooling3dLayerInfo layerInfo = BuildArmComputePooling3dLayerInfo(descriptor);
22  return arm_compute::NEPooling3dLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
23  }

◆ NeonPreluWorkloadValidate()

arm_compute::Status NeonPreluWorkloadValidate ( const TensorInfo input,
const TensorInfo alpha,
const TensorInfo output 
)

Definition at line 17 of file NeonPreluWorkload.cpp.

Referenced by NeonLayerSupport::IsPreluSupported().

20 {
21  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
22  const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
23  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
24 
25  return arm_compute::NEPReluLayer::validate(&aclInput,
26  &aclAlpha,
27  &aclOutput);
28 }

◆ NeonQLstmWorkloadValidate()

arm_compute::Status NeonQLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const TensorInfo output,
const QLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 243 of file NeonQLstmWorkload.cpp.

Referenced by NeonLayerSupport::IsQLstmSupported().

251 {
252  arm_compute::LSTMParams<arm_compute::ITensorInfo> aclParamsInfo;
253 
254  // Input/Output tensor info
255  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
256  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
257  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
258 
259  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
260  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
261  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
262 
263  // Mandatory tensor info
264  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
265  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
266  const arm_compute::TensorInfo aclInputToCellWeightsInfo
267  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
268  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
269  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
270  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
271  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
272  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
273  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
274  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
275  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
276  const arm_compute::TensorInfo aclForgetGateBiasInfo
277  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
278  const arm_compute::TensorInfo aclCellBiasInfo
279  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
280  const arm_compute::TensorInfo aclOutputGateBiasInfo
281  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
282 
283  // Optional tensor info
284  arm_compute::TensorInfo aclInputToInputWeightsInfo;
285  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
286 
287  arm_compute::TensorInfo aclCellToInputWeightsInfo;
288  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
289  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
290 
291  arm_compute::TensorInfo aclInputGateBiasInfo;
292 
293  arm_compute::TensorInfo aclProjectionWeightsInfo;
294  arm_compute::TensorInfo aclProjectionBiasInfo;
295 
296  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
297  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
298  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
299  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
300 
301  // Create tensor info for optional params if they are enabled
302  if (descriptor.m_PeepholeEnabled)
303  {
304  if (!descriptor.m_CifgEnabled)
305  {
306  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
307  }
308 
309  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
310  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
311 
312  // Set peephole params info
313  aclParamsInfo.set_peephole_params(&aclCellToForgetWeightsInfo,
314  &aclCellToOutputWeightsInfo);
315  }
316 
317  if (descriptor.m_ProjectionEnabled)
318  {
319  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
320 
321  if (paramsInfo.m_ProjectionBias != nullptr)
322  {
323  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
324  }
325 
326  // Set projection params info
327  aclParamsInfo.set_projection_params(
328  &aclProjectionWeightsInfo,
329  paramsInfo.m_ProjectionBias != nullptr ? &aclProjectionBiasInfo : nullptr);
330  }
331 
332  if (descriptor.m_LayerNormEnabled)
333  {
334  if (!descriptor.m_CifgEnabled)
335  {
336  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
337  }
338 
339  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
340  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
341  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
342 
343  // Set layer norm params info
344  aclParamsInfo.set_layer_normalization_params(
345  paramsInfo.m_InputLayerNormWeights != nullptr ? &aclInputLayerNormWeightsInfo : nullptr,
346  &aclForgetLayerNormWeightsInfo,
347  &aclCellLayerNormWeightsInfo,
348  &aclOutputLayerNormWeightsInfo);
349  }
350 
351  if (!descriptor.m_CifgEnabled)
352  {
353  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
354  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
355  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
356 
357  // Set CIFG params info
358  aclParamsInfo.set_cifg_params(
359  &aclInputToInputWeightsInfo,
360  &aclRecurrentToInputWeightsInfo,
361  paramsInfo.m_CellToInputWeights != nullptr ? &aclCellToInputWeightsInfo : nullptr,
362  &aclInputGateBiasInfo);
363  }
364 
365  // Set scalar descriptor params
366  aclParamsInfo.set_cell_clip_params(descriptor.m_CellClip);
367  aclParamsInfo.set_projection_clip_params(descriptor.m_ProjectionClip);
368  aclParamsInfo.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
369  aclParamsInfo.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
370  descriptor.m_ForgetIntermediateScale,
371  descriptor.m_CellIntermediateScale,
372  descriptor.m_OutputIntermediateScale);
373 
374  // QLSTM NEON validate
375  return arm_compute::NEQLSTMLayer::validate(&aclInputInfo,
376  &aclInputToForgetWeightsInfo,
377  &aclInputToCellWeightsInfo,
378  &aclInputToOutputWeightsInfo,
379  &aclRecurrentToForgetWeightsInfo,
380  &aclRecurrentToCellWeightsInfo,
381  &aclRecurrentToOutputWeightsInfo,
382  &aclForgetGateBiasInfo,
383  &aclCellBiasInfo,
384  &aclOutputGateBiasInfo,
385  &aclCellStateInInfo,
386  &aclOutputStateInInfo,
387  &aclCellStateOutInfo,
388  &aclOutputStateOutInfo,
389  &aclOutputInfo,
390  aclParamsInfo);
391 }

◆ NeonQuantizedLstmWorkloadValidate()

arm_compute::Status NeonQuantizedLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo cellStateIn,
const TensorInfo outputStateIn,
const TensorInfo cellStateOut,
const TensorInfo outputStateOut,
const QuantizedLstmInputParamsInfo paramsInfo 
)

Definition at line 131 of file NeonQuantizedLstmWorkload.cpp.

Referenced by NeonLayerSupport::IsQuantizedLstmSupported().

137 {
138  // The inputs and outputs
139  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
140  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
141  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
142  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
143  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
144 
145  // Basic parameters
146  const arm_compute::TensorInfo aclInputToInputWeightsInfo
147  = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
148  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
149  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
150  const arm_compute::TensorInfo aclInputToCellWeightsInfo
151  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
152  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
153  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
154 
155  const arm_compute::TensorInfo aclRecurrentToInputWeightsInfo
156  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
157  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
158  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
159  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
160  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
161  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
162  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
163 
164  const arm_compute::TensorInfo aclInputGateBiasInfo
165  = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
166  const arm_compute::TensorInfo aclForgetGateBiasInfo
167  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
168  const arm_compute::TensorInfo aclCellBiasInfo
169  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
170  const arm_compute::TensorInfo aclOutputGateBiasInfo
171  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
172 
173  return arm_compute::NELSTMLayerQuantized::validate(&aclInputInfo,
174  &aclInputToInputWeightsInfo,
175  &aclInputToForgetWeightsInfo,
176  &aclInputToCellWeightsInfo,
177  &aclInputToOutputWeightsInfo,
178  &aclRecurrentToInputWeightsInfo,
179  &aclRecurrentToForgetWeightsInfo,
180  &aclRecurrentToCellWeightsInfo,
181  &aclRecurrentToOutputWeightsInfo,
182  &aclInputGateBiasInfo,
183  &aclForgetGateBiasInfo,
184  &aclCellBiasInfo,
185  &aclOutputGateBiasInfo,
186  &aclCellStateInInfo,
187  &aclOutputStateInInfo,
188  &aclCellStateOutInfo,
189  &aclOutputStateOutInfo);
190 }

◆ NeonQuantizeWorkloadValidate()

arm_compute::Status NeonQuantizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonQuantizeWorkload.cpp.

Referenced by NeonLayerSupport::IsQuantizeSupported().

19 {
20  const arm_compute::TensorInfo neonInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo neonOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEQuantizationLayer::validate(&neonInputInfo, &neonOutputInfo);
24 }

◆ NeonReduceWorkloadValidate()

arm_compute::Status NeonReduceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ReduceDescriptor descriptor 
)

Definition at line 19 of file NeonReduceWorkload.cpp.

References ReduceDescriptor::m_vAxis.

Referenced by NeonLayerSupport::IsReduceSupported().

22 {
23  if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
24  {
25  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
28  arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
29  input.GetNumDimensions(),
30  descriptor.m_vAxis);
31 
32  return arm_compute::NEReductionOperation::validate(&aclInputInfo,
33  &aclOutputInfo,
34  static_cast<unsigned int>(coords[0]),
36  descriptor.m_KeepDims);
37  }
38  else
39  {
40  // Validate layer if there are multiple axes.
41  arm_compute::Status status;
43  return status;
44  }
45 }
#define IS_MULTI_AXES_REDUCE_SUPPORTED(func, input, desc, status)
Macro function check if layer with multiple axes is supported on each backend.
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const ReduceDescriptor &descriptor)
arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor &descriptor)
Status
enumeration
Definition: Types.hpp:42

◆ NeonReshapeWorkloadValidate()

arm_compute::Status NeonReshapeWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonReshapeWorkload.cpp.

Referenced by NeonLayerSupport::IsReshapeSupported().

19 {
20  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NEReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
24 }

◆ NeonResizeWorkloadValidate()

arm_compute::Status NeonResizeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const ResizeDescriptor descriptor 
)

Definition at line 22 of file NeonResizeWorkload.cpp.

Referenced by NeonLayerSupport::IsResizeSupported().

25 {
26  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
27  arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
28 
29  arm_compute::DataLayout aclDataLayout = ConvertDataLayout(descriptor.m_DataLayout);
30  aclInputInfo.set_data_layout(aclDataLayout);
31  aclOutputInfo.set_data_layout(aclDataLayout);
32 
33  arm_compute::InterpolationPolicy aclInterpolationPolicy =
35 
36  arm_compute::SamplingPolicy samplingPolicy = descriptor.m_HalfPixelCenters ? arm_compute::SamplingPolicy::CENTER :
37  arm_compute::SamplingPolicy::TOP_LEFT;
38 
39  bool usePadding = false;
40 
41  return arm_compute::NEScale::validate(&aclInputInfo,
42  &aclOutputInfo,
43  arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
44  arm_compute::BorderMode::REPLICATE,
45  arm_compute::PixelValue(0.f),
46  samplingPolicy,
47  usePadding,
48  descriptor.m_AlignCorners));
49 
50 }
arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)
DataLayout
Definition: Types.hpp:62

◆ NeonRsqrtWorkloadValidate()

arm_compute::Status NeonRsqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonRsqrtWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  return arm_compute::NERsqrtLayer::validate(&aclInput, &aclOutput);
24 }

◆ NeonSinWorkloadValidate()

arm_compute::Status NeonSinWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 17 of file NeonSinWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

18 {
19  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21 
22  return arm_compute::NESinLayer::validate(&aclInput, &aclOutput);
23 }

◆ NeonSliceWorkloadValidate()

arm_compute::Status NeonSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SliceDescriptor descriptor 
)

Definition at line 21 of file NeonSliceWorkload.cpp.

Referenced by NeonLayerSupport::IsSliceSupported().

24 {
25  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
26  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
27 
30 
31  std::tie(starts, ends) = SetNeonSliceData(descriptor.m_Begin, descriptor.m_Size);
32 
33  return arm_compute::NESlice::validate(&aclInputInfo, &aclOutputInfo, starts, ends);
34 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
auto SetNeonSliceData(const std::vector< unsigned int > &m_begin, const std::vector< unsigned int > &m_size)

◆ NeonSoftmaxWorkloadValidate()

arm_compute::Status NeonSoftmaxWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SoftmaxDescriptor descriptor 
)

Definition at line 19 of file NeonSoftmaxWorkload.cpp.

Referenced by NeonLayerSupport::IsSoftmaxSupported().

22 {
23  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
26  int aclAxis = ComputeAclAxis(descriptor.m_Axis, input);
27  return arm_compute::NESoftmaxLayer::validate(&aclInputInfo,
28  &aclOutputInfo,
29  descriptor.m_Beta,
30  aclAxis);
31 }
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...

◆ NeonSpaceToBatchNdWorkloadValidate()

arm_compute::Status NeonSpaceToBatchNdWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToBatchNdDescriptor descriptor 
)

Definition at line 20 of file NeonSpaceToBatchNdWorkload.cpp.

Referenced by NeonLayerSupport::IsSpaceToBatchNdSupported().

23 {
24  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
25  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
26 
27  // ArmNN blockShape is [H, W] Cl asks for W, H
28  int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
29  int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
30 
31  arm_compute::Size2D paddingLeftTop = BuildArmComputeSize2D(
32  descriptor.m_PadList[1].first, descriptor.m_PadList[0].first);
33  arm_compute::Size2D paddingRightBottom = BuildArmComputeSize2D(
34  descriptor.m_PadList[1].second, descriptor.m_PadList[0].second);
35 
36  return arm_compute::NESpaceToBatchLayer::validate(&aclInputInfo,
37  blockWidth,
38  blockHeight,
39  paddingLeftTop,
40  paddingRightBottom,
41  &aclOutputInfo);
42 }
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ NeonSpaceToDepthWorkloadValidate()

arm_compute::Status NeonSpaceToDepthWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const SpaceToDepthDescriptor descriptor 
)

Definition at line 19 of file NeonSpaceToDepthWorkload.cpp.

References SpaceToDepthDescriptor::m_DataLayout.

Referenced by NeonLayerSupport::IsSpaceToDepthSupported().

22 {
23  DataLayout dataLayout = descriptor.m_DataLayout;
24  const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
25  const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
26 
27  int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
28 
29  return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
30 }
DataLayout
Definition: Types.hpp:62
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ NeonSplitterWorkloadValidate()

arm_compute::Status NeonSplitterWorkloadValidate ( const TensorInfo input,
const std::vector< std::reference_wrapper< TensorInfo >> &  outputs,
unsigned int  splitAxis 
)

Definition at line 32 of file NeonSplitterWorkload.cpp.

Referenced by NeonLayerSupport::IsSplitterSupported().

35 {
36  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
37 
38  size_t numOutputs = outputs.size();
39 
40  std::vector<arm_compute::TensorInfo> aclOutputs;
41  aclOutputs.reserve(numOutputs);
42 
43  std::vector<arm_compute::ITensorInfo*> aclOutputPtr;
44  aclOutputPtr.reserve(numOutputs);
45 
46  for (size_t i = 0u; i < outputs.size(); ++i)
47  {
48  aclOutputs.emplace_back(BuildArmComputeTensorInfo(outputs[i]));
49  aclOutputPtr.emplace_back(&aclOutputs.back());
50  }
51 
52  unsigned int aclAxis = CalcAclAxis(input.GetNumDimensions(), splitAxis);
53  return arm_compute::NESplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
54 }

◆ NeonSqrtWorkloadValidate()

arm_compute::Status NeonSqrtWorkloadValidate ( const TensorInfo input,
const TensorInfo output 
)

Definition at line 18 of file NeonSqrtWorkload.cpp.

Referenced by NeonLayerSupport::IsElementwiseUnarySupported().

19 {
20  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
21  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
22 
23  ActivationDescriptor descriptor;
24  descriptor.m_Function = ActivationFunction::Sqrt;
25  const arm_compute::ActivationLayerInfo activationLayerInfo =
27 
28  return arm_compute::NEActivationLayer::validate(&aclInput, &aclOutput, activationLayerInfo);
29 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonStackWorkloadValidate()

arm_compute::Status NeonStackWorkloadValidate ( const std::vector< const TensorInfo *> &  inputs,
const TensorInfo output,
const StackDescriptor descriptor 
)

Definition at line 27 of file NeonStackWorkload.cpp.

Referenced by NeonLayerSupport::IsStackSupported().

30 {
31  std::vector<arm_compute::TensorInfo> aclInputs;
32  for (const TensorInfo* input : inputs)
33  {
34  arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
35  aclInputs.emplace_back(aclInputInfo);
36  }
37 
38  std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
39  for (arm_compute::ITensorInfo& input : aclInputs)
40  {
41  aclInputPtrs.emplace_back(&input);
42  }
43 
44  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
45  int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
46  return arm_compute::NEStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
47 }

◆ NeonStridedSliceWorkloadValidate()

arm_compute::Status NeonStridedSliceWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const StridedSliceDescriptor descriptor 
)

Definition at line 19 of file NeonStridedSliceWorkload.cpp.

Referenced by NeonLayerSupport::IsStridedSliceSupported().

22 {
23  const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
24  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
25 
29 
30  std::tie(starts, ends, strides) = SetNeonStridedSliceData(descriptor.m_Begin,
31  descriptor.m_End,
32  descriptor.m_Stride);
33 
34  auto numDimensions = armnn::numeric_cast<int>(input.GetNumDimensions());
35  int32_t begin_mask = ConvertMaskToACLFormat(descriptor.m_BeginMask, numDimensions);
36  int32_t end_mask = ConvertMaskToACLFormat(descriptor.m_EndMask, numDimensions);
37  int32_t shrink_axis_mask = ConvertMaskToACLFormat(descriptor.m_ShrinkAxisMask, numDimensions);
38 
39  return arm_compute::NEStridedSlice::validate(&aclInput,
40  &aclOutput,
41  starts,
42  ends,
43  strides,
44  begin_mask,
45  end_mask,
46  shrink_axis_mask);
47 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
auto SetNeonStridedSliceData(const std::vector< int > &m_begin, const std::vector< int > &m_end, const std::vector< int > &m_stride)
int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ NeonSubtractionWorkloadValidate()

arm_compute::Status NeonSubtractionWorkloadValidate ( const TensorInfo input0,
const TensorInfo input1,
const TensorInfo output,
const ActivationDescriptor activationDescriptor 
)

Definition at line 22 of file NeonSubtractionWorkload.cpp.

Referenced by NeonLayerSupport::IsSubtractionSupported(), and NeonBackend::OptimizeSubgraphView().

26 {
27  const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
28  const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
29  const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
30 
31  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
32  activationDescriptor);
33 
34  return arm_compute::NEArithmeticSubtraction::validate(&aclInput0,
35  &aclInput1,
36  &aclOutput,
37  arm_compute::ConvertPolicy::SATURATE,
38  activationInfo);
39 }
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)

◆ NeonTensorHandleFactoryId()

constexpr const char* armnn::NeonTensorHandleFactoryId ( )

Definition at line 14 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetIdStatic().

14 { return "Arm/Neon/TensorHandleFactory"; }

◆ NeonTransposeConvolution2dWorkloadValidate()

arm_compute::Status NeonTransposeConvolution2dWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeConvolution2dDescriptor descriptor,
const TensorInfo weights,
const Optional< TensorInfo > &  biases 
)

Definition at line 25 of file NeonTransposeConvolution2dWorkload.cpp.

Referenced by NeonLayerSupport::IsTransposeConvolution2dSupported().

30 {
31  const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32  const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33  const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
34 
35  arm_compute::TensorInfo aclBiasesInfo;
36  arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
37 
38  if (descriptor.m_BiasEnabled)
39  {
40  ARMNN_ASSERT(biases.has_value());
41 
42  aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
43  optionalAclBiasesInfo = &aclBiasesInfo;
44  }
45 
46  arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
47 
48  return arm_compute::NEDeconvolutionLayer::validate(&aclInputInfo,
49  &aclWeightsInfo,
50  optionalAclBiasesInfo,
51  &aclOutputInfo,
52  layerInfo);
53 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ NeonTransposeWorkloadValidate()

arm_compute::Status NeonTransposeWorkloadValidate ( const TensorInfo input,
const TensorInfo output,
const TransposeDescriptor descriptor 
)

Definition at line 15 of file NeonTransposeWorkload.cpp.

Referenced by NeonLayerSupport::IsTransposeSupported().

18 {
19  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
20  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
21  const armnn::PermutationVector& mappings = descriptor.m_DimMappings;
22 
23  return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo,
24  armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
25 }

◆ NeonUnidirectionalSequenceLstmFloatWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmFloatWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 510 of file NeonUnidirectionalSequenceLstmFloatWorkload.cpp.

References TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

518 {
519  TensorShape inputLayerShape = input.GetShape();
520  TensorShape outputLayerShape = outputStateIn.GetShape();
521 
522  unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
523  unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
524  unsigned int inputSize = inputLayerShape[2];
525  unsigned int outputSize = outputLayerShape[2];
526 
527  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
528  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
529 
530  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
531  "Permute1 status");
532  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
533  "Split status");
534  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
535  "LSTM status");
536  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
537  "Concat status");
538  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
539  "Permute2 status");
540 
541  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
542  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
543 
544  //
545  // Permute validate
546  //
547  TensorInfo permuteOutInfo = TensorInfo(input);
548  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
549  if (!descriptor.m_TimeMajor)
550  {
551  statusPermute1 = arm_compute::NEPermute::validate(&aclInputInfo,
552  &aclPermuteOutInfo,
553  arm_compute::PermutationVector(0U, 2U, 1U));
554  }
555 
556  //
557  // Split and Concat Tensors validate
558  //
559  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
560  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
561  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
562  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
563  splitterOutputsTensorInfos.reserve(maxTime);
564  concatInputsTensorInfos.reserve(maxTime);
565  for (unsigned int i = 0; i < maxTime; ++i)
566  {
567  arm_compute::TensorInfo splitter_out;
568  arm_compute::TensorInfo concat_in;
569 
570  auto splitterTensorInfo = TensorInfo(input);
571  auto concatTensorInfo = TensorInfo(output);
572  splitterTensorInfo.SetShape({batchSize, inputSize});
573  concatTensorInfo.SetShape({batchSize, outputSize});
574 
575  arm_compute::TensorInfo aclSplitterTensorInfo
576  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
577  arm_compute::TensorInfo aclConcatTensorInfo
578  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
579 
580  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
581  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
582  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
583  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
584  }
585 
586  //
587  // Split validate
588  //
589  unsigned int numberDimensions = 3;
590  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
591  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
592 
593  if (maxTime != 1) // ACL split does not work with only one element to split.
594  {
595  if (!descriptor.m_TimeMajor)
596  {
597  statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
598  splitterOutputsTensorInfosPtr,
599  aclAxisSplit);
600  } else
601  {
602  statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
603  }
604  }
605 
606  //
607  // LSTM validate
608  //
609 
610  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
611 
612  const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
613 
614  // The inputs and outputs
615  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
616  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
617  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
618  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
619  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
620 
621  // Basic parameters
622  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
623  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
624  const arm_compute::TensorInfo aclInputToCellWeightsInfo
625  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
626  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
627  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
628  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
629  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
630  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
631  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
632  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
633  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
634  const arm_compute::TensorInfo aclForgetGateBiasInfo
635  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
636  const arm_compute::TensorInfo aclCellBiasInfo
637  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
638  const arm_compute::TensorInfo aclOutputGateBiasInfo
639  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
640 
641  arm_compute::TensorInfo aclInputToInputWeightsInfo;
642  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
643  arm_compute::TensorInfo aclCellToInputWeightsInfo;
644  arm_compute::TensorInfo aclInputGateBiasInfo;
645  arm_compute::TensorInfo aclProjectionWeightsInfo;
646  arm_compute::TensorInfo aclProjectionBiasInfo;
647  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
648  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
649 
650  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
651  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
652  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
653  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
654 
655 
656  if (!descriptor.m_CifgEnabled)
657  {
658  if (descriptor.m_PeepholeEnabled)
659  {
660  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
661  }
662  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
663  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
664  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
665 
666  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
667  &aclRecurrentToInputWeightsInfo,
668  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
669  &aclInputGateBiasInfo);
670  }
671 
672  if (descriptor.m_ProjectionEnabled)
673  {
674  if (paramsInfo.m_ProjectionBias != nullptr)
675  {
676  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
677  }
678  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
679 
680  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
681  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
682  }
683 
684  if (descriptor.m_PeepholeEnabled)
685  {
686  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
687  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
688 
689  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
690  }
691 
692  if (descriptor.m_LayerNormEnabled)
693  {
694  if (!descriptor.m_CifgEnabled)
695  {
696  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
697  }
698  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
699  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
700  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
701 
702  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
703  &aclInputLayerNormWeightsInfo,
704  &aclForgetLayerNormWeightsInfo,
705  &aclCellLayerNormWeightsInfo,
706  &aclOutputLayerNormWeightsInfo);
707  }
708 
709  // Need to be set at negative threshold to be compatible for ACL
710  float cell_threshold = descriptor.m_ClippingThresCell;
711  float projection_threshold = descriptor.m_ClippingThresProj;
712 
713  arm_compute::ActivationLayerInfo activationLayerInfo =
714  ConvertLstmActivationFuncToAclLayerInfo(descriptor.m_ActivationFunc);
715 
716  for (unsigned int i = 0; i != maxTime; ++i)
717  {
718 
719  // Set LSTM input and output ITensors depending on:
720  // input format (timeMajor) & number of LSTM batches (maxTime).
721  arm_compute::ITensorInfo* outputLSTM;
722  arm_compute::ITensorInfo* inputLSTM;
723 
724  // If there is only one LSTM time major batch, we will not concat OR permute.
725  // Set input of LSTM to be first input ITensor.
726  // Set output of LSTM to be final output ITensor.
727  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
728  if (maxTime == 1 && !descriptor.m_TimeMajor)
729  {
730  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
731  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
732 
733  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
734  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
735 
736  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
737  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
738 
739  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
740  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
741 
742  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
743  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
744  }
745  // If there is only one LSTM batch major batch, we will not concat, only permute.
746  // Set input of LSTM to be output of initial permute.
747  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
748  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
749  else if (maxTime == 1 && !descriptor.m_TimeMajor)
750  {
751  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
752  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
753  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
754  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
755  inputLSTM = &aclPermuteOutInfo;
756 
757  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
758  }
759  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
760  else
761  {
762  inputLSTM = splitterOutputsTensorInfosPtr[i];
763  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
764  }
765 
766  statusLSTM = arm_compute::NELSTMLayer::validate(inputLSTM,
767  &aclInputToForgetWeightsInfo,
768  &aclInputToCellWeightsInfo,
769  &aclInputToOutputWeightsInfo,
770  &aclRecurrentToForgetWeightsInfo,
771  &aclRecurrentToCellWeightsInfo,
772  &aclRecurrentToOutputWeightsInfo,
773  &aclForgetGateBiasInfo,
774  &aclCellBiasInfo,
775  &aclOutputGateBiasInfo,
776  &aclOutputStateInInfo,
777  &aclCellStateInInfo,
778  &aclScratchBufferInfo,
779  &aclOutputStateOutInfo,
780  &aclCellStateOutInfo,
781  outputLSTM,
782  lstm_params_info,
783  activationLayerInfo,
784  cell_threshold,
785  projection_threshold);
786 
787  if (statusLSTM.error_code() != arm_compute::ErrorCode::OK)
788  {
789  break;
790  }
791  }
792 
793  //
794  // Concat validate
795  //
796 
797  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
798  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
799  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
800  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
801 
802  TensorInfo concatOutputTensorInfo = TensorInfo(output);
803  concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
804  arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
805 
806  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
807  {
808  for (unsigned int i = 0; i < maxTime; ++i)
809  {
810  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
811  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
812  }
813 
814  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
815  if (!descriptor.m_TimeMajor)
816  {
817  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
818  &aclConcatOutputTensorInfo,
819  aclAxisConcat);
820  }
821  else
822  {
823  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
824  &aclOutputInfo,
825  aclAxisConcat);
826  }
827  }
828  // If only one LSTM batch, we do not concat and/or permute.
829  // Must ensure final output info is expanded to correct batch major dimensions.
830  else
831  {
832  if (!descriptor.m_TimeMajor)
833  {
834  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
835  BuildArmComputeTensorShape(shapeExpandBatchMajor));
836  }
837  else
838  {
839  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
840  BuildArmComputeTensorShape(shapeExpandTimeMajor));
841  }
842  }
843 
844  //
845  // Permute validate
846  //
847  if (!descriptor.m_TimeMajor)
848  {
849  // Output now time major. Permute output back to batch major.
850  if (maxTime != 1)
851  {
852  statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
853  &aclOutputInfo,
854  arm_compute::PermutationVector(0U, 2U, 1U));
855  }
856  else
857  {
858  statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
859  &aclOutputInfo,
860  arm_compute::PermutationVector(0U, 2U, 1U));
861  }
862  }
863 
864  auto okCode = arm_compute::ErrorCode::OK;
865  if (statusPermute1.error_code() == okCode &&
866  statusSplit.error_code() == okCode &&
867  statusLSTM .error_code() == okCode &&
868  statusConcat.error_code() == okCode &&
869  statusPermute2.error_code() == okCode)
870  {
871  return arm_compute::Status(arm_compute::ErrorCode::OK,
872  "All Unidirectional Sequence LSTM layer validate status OK.");
873  }
874  else
875  {
876  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
877  "Unidirectional Sequence LSTM layer validate status failed.");
878  }
879 }
arm_compute::ActivationLayerInfo ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
Status
enumeration
Definition: Types.hpp:42
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:19

◆ NeonUnidirectionalSequenceLstmWorkloadValidate()

arm_compute::Status NeonUnidirectionalSequenceLstmWorkloadValidate ( const TensorInfo input,
const TensorInfo outputStateIn,
const TensorInfo cellStateIn,
const TensorInfo outputStateOut,
const TensorInfo cellStateOut,
const TensorInfo output,
const UnidirectionalSequenceLstmDescriptor descriptor,
const LstmInputParamsInfo paramsInfo 
)

Definition at line 491 of file NeonUnidirectionalSequenceLstmWorkload.cpp.

References TensorInfo::GetShape(), and LstmDescriptor::m_TimeMajor.

Referenced by NeonLayerSupport::IsUnidirectionalSequenceLstmSupported().

499 {
500  TensorShape inputLayerShape = input.GetShape();
501  TensorShape outputLayerShape = output.GetShape();
502 
503  unsigned int maxTime = descriptor.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
504  unsigned int batchSize = descriptor.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
505  unsigned int inputSize = inputLayerShape[2];
506  unsigned int outputSize = outputLayerShape[2];
507 
508  const TensorShape timeMajorShapeInput({maxTime, batchSize, inputSize});
509  const TensorShape timeMajorShapeOutput({maxTime, batchSize, outputSize});
510 
511  arm_compute::Status statusPermute1 = arm_compute::Status(arm_compute::ErrorCode::OK,
512  "Permute1 status");
513  arm_compute::Status statusSplit = arm_compute::Status(arm_compute::ErrorCode::OK,
514  "Split status");
515  arm_compute::Status statusLSTM = arm_compute::Status(arm_compute::ErrorCode::OK,
516  "LSTM status");
517  arm_compute::Status statusConcat = arm_compute::Status(arm_compute::ErrorCode::OK,
518  "Concat status");
519  arm_compute::Status statusPermute2 = arm_compute::Status(arm_compute::ErrorCode::OK,
520  "Permute2 status");
521 
522  const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
523  const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
524 
525  //
526  // Permute validate
527  //
528  TensorInfo permuteOutInfo = TensorInfo(input);
529  arm_compute::TensorInfo aclPermuteOutInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permuteOutInfo);
530  if (!descriptor.m_TimeMajor)
531  {
532  statusPermute1 = arm_compute::NEPermute::validate(&aclInputInfo,
533  &aclPermuteOutInfo,
534  arm_compute::PermutationVector(0U, 2U, 1U));
535  }
536 
537  //
538  // Split and Concat Tensors validate
539  //
540  std::vector<arm_compute::TensorInfo> splitterOutputsTensorInfos;
541  std::vector<arm_compute::TensorInfo> concatInputsTensorInfos;
542  std::vector<arm_compute::ITensorInfo*> splitterOutputsTensorInfosPtr;
543  std::vector<const arm_compute::ITensorInfo*> concatInputsTensorInfosPtr;
544  splitterOutputsTensorInfos.reserve(maxTime);
545  concatInputsTensorInfos.reserve(maxTime);
546  for (unsigned int i = 0; i < maxTime; ++i)
547  {
548  arm_compute::TensorInfo splitter_out;
549  arm_compute::TensorInfo concat_in;
550 
551  auto splitterTensorInfo = TensorInfo(input);
552  auto concatTensorInfo = TensorInfo(output);
553  splitterTensorInfo.SetShape({batchSize, inputSize});
554  concatTensorInfo.SetShape({batchSize, outputSize});
555 
556  arm_compute::TensorInfo aclSplitterTensorInfo
557  = armcomputetensorutils::BuildArmComputeTensorInfo(splitterTensorInfo);
558  arm_compute::TensorInfo aclConcatTensorInfo
559  = armcomputetensorutils::BuildArmComputeTensorInfo(concatTensorInfo);
560 
561  splitterOutputsTensorInfos.emplace_back(aclSplitterTensorInfo);
562  concatInputsTensorInfos.emplace_back(aclConcatTensorInfo);
563  splitterOutputsTensorInfosPtr.emplace_back(&splitterOutputsTensorInfos[i]);
564  concatInputsTensorInfosPtr.emplace_back(&concatInputsTensorInfos[i]);
565  }
566 
567  //
568  // Split validate
569  //
570  unsigned int numberDimensions = 3;
571  unsigned int dimension = 0; // splitting on 0-dimension (i.e. maxTime dimension)
572  unsigned int aclAxisSplit = CalcAclAxis(numberDimensions, dimension);
573 
574  if (maxTime != 1) // ACL split does not work with only one element to split.
575  {
576  if (!descriptor.m_TimeMajor)
577  {
578  statusSplit = arm_compute::NESplit::validate(&aclPermuteOutInfo,
579  splitterOutputsTensorInfosPtr,
580  aclAxisSplit);
581  } else
582  {
583  statusSplit = arm_compute::NESplit::validate(&aclInputInfo, splitterOutputsTensorInfosPtr, aclAxisSplit);
584  }
585  }
586 
587  //
588  // LSTM validate
589  //
590 
591  arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
592 
593  const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
594 
595  lstm_params_info.set_cell_clip_params(descriptor.m_ClippingThresCell);
596  lstm_params_info.set_projection_clip_params(descriptor.m_ClippingThresProj);
597  // The inputs and outputs
598  const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
599  const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
600  const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
601  const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
602  const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
603 
604  // Basic parameters
605  const arm_compute::TensorInfo aclInputToForgetWeightsInfo
606  = BuildArmComputeTensorInfo(paramsInfo.GetInputToForgetWeights());
607  const arm_compute::TensorInfo aclInputToCellWeightsInfo
608  = BuildArmComputeTensorInfo(paramsInfo.GetInputToCellWeights());
609  const arm_compute::TensorInfo aclInputToOutputWeightsInfo
610  = BuildArmComputeTensorInfo(paramsInfo.GetInputToOutputWeights());
611  const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
612  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToForgetWeights());
613  const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
614  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToCellWeights());
615  const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
616  = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToOutputWeights());
617  const arm_compute::TensorInfo aclForgetGateBiasInfo
618  = BuildArmComputeTensorInfo(paramsInfo.GetForgetGateBias());
619  const arm_compute::TensorInfo aclCellBiasInfo
620  = BuildArmComputeTensorInfo(paramsInfo.GetCellBias());
621  const arm_compute::TensorInfo aclOutputGateBiasInfo
622  = BuildArmComputeTensorInfo(paramsInfo.GetOutputGateBias());
623 
624  arm_compute::TensorInfo aclInputToInputWeightsInfo;
625  arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
626  arm_compute::TensorInfo aclCellToInputWeightsInfo;
627  arm_compute::TensorInfo aclInputGateBiasInfo;
628  arm_compute::TensorInfo aclProjectionWeightsInfo;
629  arm_compute::TensorInfo aclProjectionBiasInfo;
630  arm_compute::TensorInfo aclCellToForgetWeightsInfo;
631  arm_compute::TensorInfo aclCellToOutputWeightsInfo;
632 
633  arm_compute::TensorInfo aclInputLayerNormWeightsInfo;
634  arm_compute::TensorInfo aclForgetLayerNormWeightsInfo;
635  arm_compute::TensorInfo aclCellLayerNormWeightsInfo;
636  arm_compute::TensorInfo aclOutputLayerNormWeightsInfo;
637 
638  if (!descriptor.m_CifgEnabled)
639  {
640  if (descriptor.m_PeepholeEnabled)
641  {
642  aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToInputWeights());
643  }
644  aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputToInputWeights());
645  aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetRecurrentToInputWeights());
646  aclInputGateBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputGateBias());
647 
648  lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo,
649  &aclRecurrentToInputWeightsInfo,
650  descriptor.m_PeepholeEnabled ? &aclCellToInputWeightsInfo : nullptr,
651  &aclInputGateBiasInfo);
652  }
653 
654  if (descriptor.m_ProjectionEnabled)
655  {
656  if (paramsInfo.m_ProjectionBias != nullptr)
657  {
658  aclProjectionBiasInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionBias());
659  }
660  aclProjectionWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetProjectionWeights());
661 
662  lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
663  paramsInfo.m_ProjectionBias ? &aclProjectionBiasInfo : nullptr);
664  }
665 
666  if (descriptor.m_PeepholeEnabled)
667  {
668  aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToForgetWeights());
669  aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellToOutputWeights());
670 
671  lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
672  }
673 
674  if (descriptor.m_LayerNormEnabled)
675  {
676  if (!descriptor.m_CifgEnabled)
677  {
678  aclInputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetInputLayerNormWeights());
679  }
680  aclForgetLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetForgetLayerNormWeights());
681  aclCellLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetCellLayerNormWeights());
682  aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
683 
684  lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
685  &aclInputLayerNormWeightsInfo,
686  &aclForgetLayerNormWeightsInfo,
687  &aclCellLayerNormWeightsInfo,
688  &aclOutputLayerNormWeightsInfo);
689  }
690 
691  lstm_params_info.set_matmul_scale_params(descriptor.m_InputIntermediateScale,
692  descriptor.m_ForgetIntermediateScale,
693  descriptor.m_CellIntermediateScale,
694  descriptor.m_OutputIntermediateScale);
695 
696  lstm_params_info.set_hidden_state_params(descriptor.m_HiddenStateZeroPoint, descriptor.m_HiddenStateScale);
697 
698  for (unsigned int i = 0; i != maxTime; ++i)
699  {
700 
701  // Set LSTM input and output ITensors depending on:
702  // input format (timeMajor) & number of LSTM batches (maxTime).
703  arm_compute::ITensorInfo* outputLSTM;
704  arm_compute::ITensorInfo* inputLSTM;
705 
706  // If there is only one LSTM time major batch, we will not concat OR permute.
707  // Set input of LSTM to be first input ITensor.
708  // Set output of LSTM to be final output ITensor.
709  // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
710  if (maxTime == 1 && !descriptor.m_TimeMajor)
711  {
712  TensorShape inputShape = GetTensorShape(aclInputInfo.tensor_shape(), 1U);
713  TensorShape outputShape = GetTensorShape(aclOutputInfo.tensor_shape(), 1U);
714 
715  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
716  TensorShape outputShapeShrink({outputShape[1], outputShape[2]});
717 
718  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
719  auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
720 
721  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(acl_input_shape_shrink);
722  inputLSTM = const_cast<arm_compute::TensorInfo*>(&aclInputInfo);
723 
724  const_cast<arm_compute::TensorInfo*>(&aclOutputInfo)->set_tensor_shape(acl_output_shape_shrink);
725  outputLSTM = const_cast<arm_compute::TensorInfo*>(&aclOutputInfo);
726  }
727  // If there is only one LSTM batch major batch, we will not concat, only permute.
728  // Set input of LSTM to be output of initial permute.
729  // Set output of LSTM to be first element of m_ConcatInputs & use that value later in permute.
730  // LSTM output cannot be > 2 dimensions so need to resize its TensorInfo.
731  else if (maxTime == 1 && !descriptor.m_TimeMajor)
732  {
733  TensorShape inputShape = GetTensorShape(aclPermuteOutInfo.tensor_shape(), 1U);
734  TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
735  auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
736  aclPermuteOutInfo.set_tensor_shape(acl_input_shape_shrink);
737  inputLSTM = &aclPermuteOutInfo;
738 
739  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
740  }
741  // Batch major AND/OR 2+ LSTM batches so will use concat AND/OR permute later on.
742  else
743  {
744  inputLSTM = splitterOutputsTensorInfosPtr[i];
745  outputLSTM = const_cast<arm_compute::ITensorInfo*>(concatInputsTensorInfosPtr[i]);
746  }
747 
748  statusLSTM = arm_compute::NEQLSTMLayer::validate(inputLSTM,
749  &aclInputToForgetWeightsInfo,
750  &aclInputToCellWeightsInfo,
751  &aclInputToOutputWeightsInfo,
752  &aclRecurrentToForgetWeightsInfo,
753  &aclRecurrentToCellWeightsInfo,
754  &aclRecurrentToOutputWeightsInfo,
755  &aclForgetGateBiasInfo,
756  &aclCellBiasInfo,
757  &aclOutputGateBiasInfo,
758  &aclCellStateInInfo,
759  &aclOutputStateInInfo,
760  &aclCellStateOutInfo,
761  &aclOutputStateOutInfo,
762  outputLSTM,
763  lstm_params_info);
764  }
765 
766  //
767  // Concat validate
768  //
769 
770  // Expand dimensions of LSTM outputs adding one empty dimension to fit concatenate inputs.
771  TensorShape shape = GetTensorShape(concatInputsTensorInfosPtr[0]->tensor_shape(), 1U);
772  TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
773  TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
774 
775  TensorInfo concatOutputTensorInfo = TensorInfo(output);
776  concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
777  arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
778 
779  if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
780  {
781  for (unsigned int i = 0; i < maxTime; ++i)
782  {
783  auto acl_shape_expand = BuildArmComputeTensorShape(shapeExpandTimeMajor);
784  concatInputsTensorInfos[i].set_tensor_shape(acl_shape_expand);
785  }
786 
787  unsigned int aclAxisConcat = CalcAclAxis(numberDimensions, dimension);
788  if (!descriptor.m_TimeMajor)
789  {
790  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
791  &aclConcatOutputTensorInfo,
792  aclAxisConcat);
793  }
794  else
795  {
796  statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
797  &aclOutputInfo,
798  aclAxisConcat);
799  }
800  }
801  // If only one LSTM batch, we do not concat and/or permute.
802  // Must ensure final output info is expanded to correct batch major dimensions.
803  else
804  {
805  if (!descriptor.m_TimeMajor)
806  {
807  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
808  BuildArmComputeTensorShape(shapeExpandBatchMajor));
809  }
810  else
811  {
812  const_cast<arm_compute::TensorInfo*>(&aclInputInfo)->set_tensor_shape(
813  BuildArmComputeTensorShape(shapeExpandTimeMajor));
814  }
815  }
816 
817  //
818  // Permute validate
819  //
820  if (!descriptor.m_TimeMajor)
821  {
822  // Output now time major. Permute output back to batch major.
823  if (maxTime != 1)
824  {
825  statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
826  &aclOutputInfo,
827  arm_compute::PermutationVector(0U, 2U, 1U));
828  }
829  else
830  {
831  statusPermute2 = arm_compute::NEPermute::validate(concatInputsTensorInfosPtr[0],
832  &aclOutputInfo,
833  arm_compute::PermutationVector(0U, 2U, 1U));
834  }
835  }
836 
837  auto okCode = arm_compute::ErrorCode::OK;
838  if (statusPermute1.error_code() == okCode &&
839  statusSplit.error_code() == okCode &&
840  statusLSTM .error_code() == okCode &&
841  statusConcat.error_code() == okCode &&
842  statusPermute2.error_code() == okCode)
843  {
844  return arm_compute::Status(arm_compute::ErrorCode::OK,
845  "All Unidirectional Sequence LSTM layer validate status OK.");
846  }
847  else
848  {
849  return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
850  "Unidirectional Sequence LSTM layer validate status failed.");
851  }
852 }
Status
enumeration
Definition: Types.hpp:42
armnn::TensorShape GetTensorShape(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout)
Definition: TensorUtils.cpp:19

◆ NextIndex()

bool armnn::NextIndex ( const unsigned int  numDims,
const armnn::TensorShape dims,
std::vector< unsigned int > &  current 
)

Definition at line 19 of file Reduce.cpp.

Referenced by Reduce().

20 {
21  unsigned int carry = 1;
22 
23  for (unsigned int idx = numDims; idx-- > 0; )
24  {
25  unsigned int current_val = current[idx] + carry;
26  if (dims[idx] == current_val)
27  {
28  current[idx] = 0;
29  }
30  else
31  {
32  current[idx] = current_val;
33  carry = 0;
34  break;
35  }
36  }
37  return (carry == 0);
38 }

◆ NonMaxSuppression()

std::vector< unsigned int > NonMaxSuppression ( unsigned int  numBoxes,
const std::vector< float > &  boxCorners,
const std::vector< float > &  scores,
float  nmsScoreThreshold,
unsigned int  maxDetection,
float  nmsIouThreshold 
)

Definition at line 49 of file DetectionPostProcess.cpp.

References GenerateRangeK(), IntersectionOverUnion(), numeric_cast(), and TopKSort().

Referenced by DetectionPostProcess(), and TEST_SUITE().

55 {
56  // Select boxes that have scores above a given threshold.
57  std::vector<float> scoresAboveThreshold;
58  std::vector<unsigned int> indicesAboveThreshold;
59  for (unsigned int i = 0; i < numBoxes; ++i)
60  {
61  if (scores[i] >= nmsScoreThreshold)
62  {
63  scoresAboveThreshold.push_back(scores[i]);
64  indicesAboveThreshold.push_back(i);
65  }
66  }
67 
68  // Sort the indices based on scores.
69  unsigned int numAboveThreshold = armnn::numeric_cast<unsigned int>(scoresAboveThreshold.size());
70  std::vector<unsigned int> sortedIndices = GenerateRangeK(numAboveThreshold);
71  TopKSort(numAboveThreshold, sortedIndices.data(), scoresAboveThreshold.data(), numAboveThreshold);
72 
73  // Number of output cannot be more than max detections specified in the option.
74  unsigned int numOutput = std::min(maxDetection, numAboveThreshold);
75  std::vector<unsigned int> outputIndices;
76  std::vector<bool> visited(numAboveThreshold, false);
77 
78  // Prune out the boxes with high intersection over union by keeping the box with higher score.
79  for (unsigned int i = 0; i < numAboveThreshold; ++i)
80  {
81  if (outputIndices.size() >= numOutput)
82  {
83  break;
84  }
85  if (!visited[sortedIndices[i]])
86  {
87  outputIndices.push_back(indicesAboveThreshold[sortedIndices[i]]);
88  for (unsigned int j = i + 1; j < numAboveThreshold; ++j)
89  {
90  unsigned int iIndex = indicesAboveThreshold[sortedIndices[i]] * 4;
91  unsigned int jIndex = indicesAboveThreshold[sortedIndices[j]] * 4;
92  if (IntersectionOverUnion(&boxCorners[iIndex], &boxCorners[jIndex]) > nmsIouThreshold)
93  {
94  visited[sortedIndices[j]] = true;
95  }
96  }
97  }
98  }
99  return outputIndices;
100 }
float IntersectionOverUnion(const float *boxI, const float *boxJ)
std::vector< unsigned int > GenerateRangeK(unsigned int k)
void TopKSort(unsigned int k, unsigned int *indices, const float *values, unsigned int numElement)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ numeric_cast() [1/9]

std::enable_if_t< std::is_unsigned<Source>::value && std::is_unsigned<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 35 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

Referenced by AllocateOutputData(), ArgMinMax(), armnnTfLiteParser::AsFloatArray(), CheckInferenceTimeThreshold(), ClArgMinMaxWorkload::ClArgMinMaxWorkload(), ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(), ClStridedSliceWorkload::ClStridedSliceWorkload(), ComputeReductionTensorShape(), armnnTfLiteParser::ComputeWrappedIndex(), OutputSlot::Connect(), CreateNetworkImpl< IParser >::Create(), OnnxParserImpl::CreateNetworkFromString(), DepthwiseConvolution2dAsymmetricTestImpl(), DepthwiseConvolution2dTestImpl(), DetectionPostProcess(), RefL2NormalizationWorkload::ExecuteAsync(), armnnUtils::ExpandDims(), FakeQuantization(), Gather(), MockCounterDirectory::GetCategoryCount(), MockCounterDirectory::GetCounterCount(), MockCounterDirectory::GetCounterSetCount(), MockCounterDirectory::GetDeviceCount(), IDeserializer::DeserializerImpl::GetNetworkOutputBindingInfo(), OutputSlot::GetNumConnections(), SubgraphView::GetNumInputSlots(), SubgraphView::GetNumOutputSlots(), StridedSliceDescriptor::GetStartForAxis(), StridedSliceDescriptor::GetStopForAxis(), GetStreamMetaDataPacketSize(), Cifar10Database::GetTestCaseData(), YoloDatabase::GetTestCaseData(), armnnUtils::GetUnsignedAxis(), RequestCountersPacketHandler::HandlePacket(), InferenceTestImage::InferenceTestImage(), PreluLayer::InferOutputShapes(), RefLayerSupport::IsMeanSupported(), LogSoftmax(), main(), LoadedNetwork::MakeLoadedNetwork(), NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(), NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(), NeonStridedSliceWorkload::NeonStridedSliceWorkload(), NonMaxSuppression(), ClassifierTestCaseProvider< TDatabase, InferenceModel >::OnInferenceTestFinished(), IDeserializer::DeserializerImpl::OutputShapeOfReshape(), TfLiteParserImpl::OutputShapeOfReshape(), ParseArray(), ParseDataArray< armnn::DataType::QAsymmS8 >(), ParseDataArray< armnn::DataType::QAsymmU8 >(), ParseDataArray< armnn::DataType::QSymmS8 >(), Pooling2d(), Pooling3d(), ClassifierTestCase< TTestCaseDatabase, TModel >::ProcessResult(), Reduce(), InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), ClContextSerializer::SaveSerializedToStream(), ISerializer::SerializerImpl::SaveSerializedToStream(), SimpleConvolution2dNhwcTestImpl(), SimpleConvolution2dTestImpl(), SimpleConvolution3dTestImpl(), InferenceTestImage::StbResize(), StridedSlice(), Graph::SubstituteSubgraph(), TEST_SUITE(), MeanQueueDescriptor::Validate(), ReduceLayer::ValidateTensorShapesFromInputs(), MeanLayer::ValidateTensorShapesFromInputs(), VerifyTimelineLabelBinaryPacketData(), and WorkingMemHandle::WorkingMemHandle().

36 {
37 #if ENABLE_NUMERIC_CAST_CHECKS
38  if (source > std::numeric_limits<Dest>::max())
39  {
40  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to "
41  "narrower unsigned type. Overflow detected.");
42  }
43 #endif // ENABLE_NUMERIC_CAST_CHECKS
44 
45  return static_cast<Dest>(source);
46 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [2/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 58 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

59 {
60 #if ENABLE_NUMERIC_CAST_CHECKS
61  if (source > std::numeric_limits<Dest>::max())
62  {
63  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
64  "Overflow detected.");
65  }
66 
67  if (source < std::numeric_limits<Dest>::lowest())
68  {
69  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed integral type to narrower signed type. "
70  "Underflow detected.");
71  }
72 #endif // ENABLE_NUMERIC_CAST_CHECKS
73 
74  return static_cast<Dest>(source);
75 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [3/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 83 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

84 {
85 #if ENABLE_NUMERIC_CAST_CHECKS
86  if (source > std::numeric_limits<Dest>::max())
87  {
88  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
89  "Overflow detected.");
90  }
91 
92  if (source < std::numeric_limits<Dest>::lowest())
93  {
94  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
95  "Underflow detected.");
96  }
97 #endif // ENABLE_NUMERIC_CAST_CHECKS
98 
99  return static_cast<Dest>(source);
100 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [4/9]

std::enable_if_t< std::is_floating_point<Source>::value && std::is_signed<Dest>::value && std::is_integral<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 109 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

110 {
111 #if ENABLE_NUMERIC_CAST_CHECKS
112  if (source > static_cast<Source>(std::numeric_limits<Dest>::max()))
113  {
114  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
115  "Overflow detected.");
116  }
117 
118  if (source < static_cast<Source>(std::numeric_limits<Dest>::lowest()))
119  {
120  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to narrower signed type. "
121  "Underflow detected.");
122  }
123 #endif // ENABLE_NUMERIC_CAST_CHECKS
124 
125  return static_cast<Dest>(source);
126 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [5/9]

std::enable_if_t< std::is_signed<Source>::value && std::is_integral<Source>::value && std::is_floating_point<Dest>::value, Dest> armnn::numeric_cast ( Source  source)

Definition at line 135 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

136 {
137 #if ENABLE_NUMERIC_CAST_CHECKS
138  Dest sourceConverted = static_cast<Dest>(source);
139 
140  if (sourceConverted > std::numeric_limits<Dest>::max())
141  {
142  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
143  "Overflow detected.");
144  }
145 
146  if (sourceConverted < std::numeric_limits<Dest>::lowest())
147  {
148  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to narrower floating point type. "
149  "Underflow detected.");
150  }
151 #endif // ENABLE_NUMERIC_CAST_CHECKS
152 
153  return static_cast<Dest>(source);
154 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [6/9]

std::enable_if_t< std::is_signed<Dest>::value && std::is_integral<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 165 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

166 {
167 #if ENABLE_NUMERIC_CAST_CHECKS
168  if (sValue > static_cast< typename std::make_unsigned<Dest>::type >(std::numeric_limits<Dest>::max()))
169  {
170  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to signed type. "
171  "Overflow detected.");
172  }
173 #endif // ENABLE_NUMERIC_CAST_CHECKS
174 
175  return static_cast<Dest>(sValue);
176 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [7/9]

std::enable_if_t< std::is_floating_point<Dest>::value && std::is_unsigned<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 184 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

185 {
186 #if ENABLE_NUMERIC_CAST_CHECKS
187  if (static_cast<Dest>(sValue) > std::numeric_limits<Dest>::max())
188  {
189  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting unsigned type to floating point type. "
190  "Overflow detected.");
191  }
192 #endif // ENABLE_NUMERIC_CAST_CHECKS
193 
194  return static_cast<Dest>(sValue);
195 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [8/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_signed<Source>::value && std::is_integral<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 206 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

207 {
208 #if ENABLE_NUMERIC_CAST_CHECKS
209  if (sValue < 0)
210  {
211  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
212  "Underflow detected.");
213  }
214 
215  if (static_cast< typename std::make_unsigned<Source>::type >(sValue) > std::numeric_limits<Dest>::max())
216  {
217  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting signed type to unsigned type. "
218  "Overflow detected.");
219  }
220 #endif // ENABLE_NUMERIC_CAST_CHECKS
221  return static_cast<Dest>(sValue);
222 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ numeric_cast() [9/9]

std::enable_if_t< std::is_unsigned<Dest>::value && std::is_floating_point<Source>::value, Dest> armnn::numeric_cast ( Source  sValue)

Definition at line 230 of file NumericCast.hpp.

References ARMNN_NUMERIC_CAST_CHECK.

231 {
232 #if ENABLE_NUMERIC_CAST_CHECKS
233  if (sValue < 0)
234  {
235  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting negative value to unsigned type. "
236  "Underflow detected.");
237  }
238 
239  if (sValue > static_cast<Source>(std::numeric_limits<Dest>::max()))
240  {
241  ARMNN_NUMERIC_CAST_CHECK(false, "numeric_cast failed casting floating point type to unsigned type. "
242  "Overflow detected.");
243  }
244 #endif // ENABLE_NUMERIC_CAST_CHECKS
245  return static_cast<Dest>(sValue);
246 }
#define ARMNN_NUMERIC_CAST_CHECK(cond, msg)
Definition: NumericCast.hpp:25

◆ Offset()

unsigned int armnn::Offset ( const TensorShape shape,
unsigned int  batch,
unsigned int  height,
unsigned int  width,
unsigned int  channels,
const DataLayoutIndexed dataLayout 
)
inline

Definition at line 19 of file BatchToSpaceNd.cpp.

References DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetWidthIndex(), and NHWC.

Referenced by BatchToSpaceNd().

21 {
22  if (dataLayout.GetDataLayout() == DataLayout::NHWC)
23  {
24  return ((batch * shape[dataLayout.GetHeightIndex()] + height) * shape[dataLayout.GetWidthIndex()] + width) *
25  shape[dataLayout.GetChannelsIndex()] + channels;
26  }
27  else
28  {
29  return ((batch * shape[dataLayout.GetChannelsIndex()] + channels) *
30  shape[dataLayout.GetHeightIndex()] + height) *
31  shape[dataLayout.GetWidthIndex()] + width;
32  }
33 }
unsigned int GetWidthIndex() const
unsigned int GetHeightIndex() const
armnn::DataLayout GetDataLayout() const
unsigned int GetChannelsIndex() const

◆ operator<<() [1/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const std::vector< Compute > &  compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 47 of file BackendId.hpp.

References GetComputeDeviceAsCString().

48 {
49  for (const Compute& comp : compute)
50  {
51  os << GetComputeDeviceAsCString(comp) << " ";
52  }
53  return os;
54 }
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
constexpr char const * GetComputeDeviceAsCString(Compute compute)
Deprecated function that will be removed together with the Compute enum.
Definition: BackendId.hpp:34

◆ operator<<() [2/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const std::set< Compute > &  compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 58 of file BackendId.hpp.

References GetComputeDeviceAsCString().

59 {
60  for (const Compute& comp : compute)
61  {
62  os << GetComputeDeviceAsCString(comp) << " ";
63  }
64  return os;
65 }
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
constexpr char const * GetComputeDeviceAsCString(Compute compute)
Deprecated function that will be removed together with the Compute enum.
Definition: BackendId.hpp:34

◆ operator<<() [3/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BackendVersion backendVersion 
)
inline

Definition at line 68 of file IBackendInternal.hpp.

References BackendVersion::m_Major, and BackendVersion::m_Minor.

69 {
70  os << "[" << backendVersion.m_Major << "." << backendVersion.m_Minor << "]";
71 
72  return os;
73 }

◆ operator<<() [4/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const Compute compute 
)
inline

Deprecated function that will be removed together with the Compute enum.

Definition at line 69 of file BackendId.hpp.

References GetComputeDeviceAsCString().

70 {
71  os << GetComputeDeviceAsCString(compute);
72  return os;
73 }
constexpr char const * GetComputeDeviceAsCString(Compute compute)
Deprecated function that will be removed together with the Compute enum.
Definition: BackendId.hpp:34

◆ operator<<() [5/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BFloat16 b 
)
inline

Definition at line 122 of file BFloat16.hpp.

References BFloat16::ToFloat32(), and BFloat16::Val().

123 {
124  os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
125  return os;
126 }

◆ operator<<() [6/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const BackendId id 
)
inline

Definition at line 176 of file BackendId.hpp.

177 {
178  os << id.Get();
179  return os;
180 }

◆ operator<<() [7/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const TContainer< BackendId, TContainerTemplateArgs... > &  ids 
)

Definition at line 183 of file BackendId.hpp.

185 {
186  os << '[';
187  for (const auto& id : ids) { os << id << " "; }
188  os << ']';
189  return os;
190 }

◆ operator<<() [8/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
Status  stat 
)
inline

Definition at line 297 of file TypesUtils.hpp.

References GetStatusAsCString().

298 {
299  os << GetStatusAsCString(stat);
300  return os;
301 }
constexpr char const * GetStatusAsCString(Status status)
Definition: TypesUtils.hpp:17

◆ operator<<() [9/9]

std::ostream& armnn::operator<< ( std::ostream &  os,
const armnn::TensorShape shape 
)
inline

Definition at line 304 of file TypesUtils.hpp.

References Dequantize, TensorShape::GetNumDimensions(), and Quantize.

305 {
306  os << "[";
307  for (uint32_t i=0; i<shape.GetNumDimensions(); ++i)
308  {
309  if (i!=0)
310  {
311  os << ",";
312  }
313  os << shape[i];
314  }
315  os << "]";
316  return os;
317 }
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174

◆ operator>>() [1/2]

std::istream& armnn::operator>> ( std::istream &  in,
armnn::Compute compute 
)
inline

Definition at line 23 of file InferenceTest.hpp.

References ParseComputeDevice(), and Undefined.

24 {
25  std::string token;
26  in >> token;
27  compute = armnn::ParseComputeDevice(token.c_str());
28  if (compute == armnn::Compute::Undefined)
29  {
30  in.setstate(std::ios_base::failbit);
31  throw cxxopts::OptionException(fmt::format("Unrecognised compute device: {}", token));
32  }
33  return in;
34 }
constexpr armnn::Compute ParseComputeDevice(const char *str)
Deprecated function that will be removed together with the Compute enum.
Definition: TypesUtils.hpp:182

◆ operator>>() [2/2]

std::istream& armnn::operator>> ( std::istream &  in,
armnn::BackendId backend 
)
inline

Definition at line 36 of file InferenceTest.hpp.

References ParseComputeDevice(), and Undefined.

37 {
38  std::string token;
39  in >> token;
40  armnn::Compute compute = armnn::ParseComputeDevice(token.c_str());
41  if (compute == armnn::Compute::Undefined)
42  {
43  in.setstate(std::ios_base::failbit);
44  throw cxxopts::OptionException(fmt::format("Unrecognised compute device: {}", token));
45  }
46  backend = compute;
47  return in;
48 }
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
constexpr armnn::Compute ParseComputeDevice(const char *str)
Deprecated function that will be removed together with the Compute enum.
Definition: TypesUtils.hpp:182

◆ Optimize() [1/2]

IOptimizedNetworkPtr Optimize ( const INetwork network,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptions options = OptimizerOptions(),
Optional< std::vector< std::string > &>  messages = EmptyOptional() 
)

Create an optimized version of the network.

Parameters
networkINetwork description of the network to be optimized.
backendPreferencesThe choice of the backend ordered by user preferences.
deviceSpecDeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messagesIf there are failures or warnings a string describing same will be added to the vector
optionsOptimizerOptions object with optimizer configuration options
Returns
An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.
Examples:
AsyncExecutionSample.cpp, CustomMemoryAllocatorSample.cpp, DynamicSample.cpp, and SimpleSample.cpp.

Definition at line 1847 of file Network.cpp.

References BackendOptions::Var::AsBool(), IOptimizedNetwork::Optimize, ParseOptions(), and INetwork::pNetworkImpl.

Referenced by armnn::experimental::AsyncEndToEndTestImpl(), armnn::experimental::AsyncThreadedEndToEndTestImpl(), GetSoftmaxProfilerJson(), InferenceModel< IParser, TDataType >::InferenceModel(), ParserFlatbuffersFixture::loadNetwork(), main(), QLstmEndToEnd(), QuantizedLstmEndToEnd(), ParserPrototxtFixture< TParser >::Setup(), ParserFlatbuffersSerializeFixture::Setup(), ParserPrototxtFixture< TParser >::SetupOptimizedNetwork(), TEST_CASE_FIXTURE(), TEST_SUITE(), VerifyPostOptimisationStructureTestImpl(), and IMemoryOptimizerStrategy::~IMemoryOptimizerStrategy().

1852 {
1853  return Optimize(inNetwork.pNetworkImpl->GetGraph(),
1854  backendPreferences,
1855  deviceSpec,
1856  options,
1857  messages);
1858 }
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847

◆ Optimize() [2/2]

IOptimizedNetworkPtr Optimize ( const Graph inGraph,
const std::vector< BackendId > &  backendPreferences,
const IDeviceSpec deviceSpec,
const OptimizerOptions options,
Optional< std::vector< std::string > &>  messages = EmptyOptional() 
)

Create an optimized version of the network.

Parameters
inGraphGraph to be optimized.
backendPreferencesThe choice of the backend ordered by user preferences.
deviceSpecDeviceSpec object as queried from the runtime. See IRuntime::GetDeviceSpec()
messagesIf there are failures or warnings a string describing same will be added to the vector
optionsOptimizerOptions object with optimizer configuration options
Returns
An IOptimizedNetworkPtr interface to the optimized network, throws an exception derived from armnn::Exception if process fails.

Definition at line 1670 of file Network.cpp.

References Graph::AddCompatibilityLayers(), ApplyBackendOptimizations(), ARMNN_LOG, ARMNN_SCOPED_PROFILING_EVENT, AssignBackends(), Graph::begin(), CreateSupportedBackends(), debug, IOptimizedNetwork::Destroy(), Graph::end(), BackendSettings::GetAvailablePreferredBackends(), ProfilerManager::GetInstance(), Graph::GetProfiler(), InferAndValidate, Graph::InferTensorInfos(), IOptimizedNetwork::IOptimizedNetwork(), OptimizerOptions::m_Debug, OptimizationResult::m_Error, OptimizerOptions::m_ImportEnabled, OptimizerOptions::m_ModelOptions, OptimizerOptions::m_ProfilingEnabled, OptimizerOptions::m_ReduceFp32ToBf16, OptimizerOptions::m_ReduceFp32ToFp16, OptimizerOptions::m_shapeInferenceMethod, BackendSettings::m_SupportedBackends, MakeOptimizations(), Optimizer::Pass(), IOptimizedNetwork::pOptimizedNetworkImpl, ProfilerManager::RegisterProfiler(), ReportError(), SelectTensorHandleStrategy(), OptimizerOptions::ToString(), Undefined, ValidateOnly, and Graph::VerifyConstantLayerSetTensorInfo().

1675 {
1676  ARMNN_LOG(debug) << options.ToString();
1677 
1678  // Enable profiling
1679  auto profiler = inGraph.GetProfiler();
1680  ProfilerManager::GetInstance().RegisterProfiler(profiler.get());
1681  profiler->EnableProfiling(options.m_ProfilingEnabled);
1682 
1683  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer");
1684  if (backendPreferences.empty())
1685  {
1686  throw InvalidArgumentException("Invoked Optimize with no backends specified");
1687  }
1688 
1689  if (options.m_ReduceFp32ToFp16 && options.m_ReduceFp32ToBf16)
1690  {
1691  throw InvalidArgumentException("BFloat16 and Float16 optimization cannot be enabled at the same time.");
1692  }
1693 
1694  // Ensure TensorInfo is set on all output slots of ConstantLayers in the graph
1695  inGraph.VerifyConstantLayerSetTensorInfo();
1696 
1697  std::unique_ptr<Graph> graph = std::make_unique<Graph>(inGraph);
1698 
1699  auto optNet = IOptimizedNetworkPtr(new IOptimizedNetwork(std::move(graph), options.m_ModelOptions),
1700  &IOptimizedNetwork::Destroy);
1701 
1702  IOptimizedNetwork* optNetObjPtr = optNet.get();
1703 
1704  // Get the optimized graph
1705  Graph& optGraph = optNetObjPtr->pOptimizedNetworkImpl->GetGraph();
1706 
1707  if(options.m_shapeInferenceMethod == ShapeInferenceMethod::InferAndValidate)
1708  {
1709  // Infer the tensor infos for all output slots. Throws an exception on failure
1710  optGraph.InferTensorInfos();
1711  }
1712 
1713  // Perform AddBroadcastReshapeLayer optimisation
1714  using namespace optimizations;
1715  Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
1716 
1717  if(options.m_shapeInferenceMethod == ShapeInferenceMethod::ValidateOnly)
1718  {
1719  // Validate the tensor infos for all output slots. Throws an exception on failure
1720  optGraph.InferTensorInfos();
1721  }
1722 
1723  // Need to FusePermuteIntoConstantLayer before FoldPadIntoDepthwiseConvolution2d or
1724  // FuseBatchNormIntoDepthwiseConvolution2D optimizations are called.
1725  Optimizer::Pass(optGraph, MakeOptimizations(FusePermuteIntoConstLayer()));
1726 
1727  // Perform optimisation passes
1728  Optimizer::Pass(optGraph, MakeOptimizations(SquashEqualPermuteSiblings(),
1733  MovePermuteUp(),
1734  MoveTransposeUp(),
1735  PermuteAsReshape(),
1748 
1749  // If Fp32 to Fp16 optimization is set convert Fp32 network to Fp16
1750  if (options.m_ReduceFp32ToFp16)
1751  {
1752  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
1753  Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
1754  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1755  }
1756 
1757  // If Fp32 to Bf16 optimization is set convert Fp32 network to Bf16
1758  // Convert input of Convolution2d and FullyConnected from Fp32 to Bf16
1759  // Only Constant weight of Convolution2d and FullyConnected are converted from Fp32 to Bf16
1760  if (options.m_ReduceFp32ToBf16)
1761  {
1762  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToBf16");
1763  Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToBf16Converter()));
1764  }
1765 
1766  // Initialize backend settings
1767  BackendSettings backendSettings(backendPreferences, deviceSpec);
1768  if (backendSettings.GetAvailablePreferredBackends().empty())
1769  {
1770  std::stringstream failureMsg;
1771  failureMsg << "None of the preferred backends " << backendPreferences
1772  << " are supported. Current platform provides " << backendSettings.m_SupportedBackends;
1773  ReportError(failureMsg.str(), messages);
1774  throw InvalidArgumentException(failureMsg.str());
1775  }
1776 
1777  // Create a map to temporarily hold initialized backend objects
1778  TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
1779  BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
1780 
1781  // Assign an available backend to each layer
1782  Graph::Iterator firstLayer = optGraph.begin();
1783  Graph::Iterator lastLayer = optGraph.end();
1784  OptimizationResult assignBackendsResult = AssignBackends(optNetObjPtr->pOptimizedNetworkImpl.get(),
1785  backendSettings,
1786  firstLayer,
1787  lastLayer,
1788  messages);
1789  if (assignBackendsResult.m_Error)
1790  {
1791  // Failed to assign a backend to each layer
1792  throw InvalidArgumentException("Failed to assign a backend to each layer");
1793  }
1794 
1795  Optimizer::Pass(optGraph, MakeOptimizations(OptimizeInverseConversionsFp16(),
1797 
1798  // Apply the backend-specific optimizations
1799  OptimizationResult backendOptimizationResult = ApplyBackendOptimizations(optNetObjPtr->pOptimizedNetworkImpl.get(),
1800  backendSettings,
1801  backends,
1802  options.m_ModelOptions,
1803  messages);
1804  if (backendOptimizationResult.m_Error)
1805  {
1806  // Failed to apply the backend-specific optimizations
1807  throw InvalidArgumentException("Failed to apply the backend-specific optimizations");
1808  }
1809 
1810  // If the debug flag is set, then insert a DebugLayer after each layer
1811  // Doing this after applying the backend optimizations as they might have changed some layers
1812  if (options.m_Debug)
1813  {
1814  Optimizer::Pass(optGraph, MakeOptimizations(InsertDebugLayer()));
1815  }
1816 
1817  // Calculate the compatibility strategies for tensor handles
1818  OptimizationResult strategyResult = SelectTensorHandleStrategy(optGraph,
1819  backends,
1820  tensorHandleFactoryRegistry,
1821  options.m_ImportEnabled,
1822  messages);
1823  if (strategyResult.m_Error)
1824  {
1825  // Failed to apply the backend-specific optimizations
1826  return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
1827  }
1828 
1829  // Based on the tensor handle strategy determined above, insert copy layers where required.
1830  {
1831  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_AddCompatibilityLayers");
1832  optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry);
1833  }
1834 
1835  // Convert constants
1836  {
1837  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
1838  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
1839  Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
1840 
1841  // Once the constants are converted we can now safely call RedirectMembersToConstantInputs
1842  Optimizer::Pass(optGraph, MakeOptimizations(RedirectMembersToConstantInputs()));
1843  }
1844  return optNet;
1845 }
OptimizeForConnection< Layer, PermuteLayer, SquashEqualSiblingsImpl< PermuteLayer > > SquashEqualPermuteSiblings
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)
Definition: Network.cpp:556
OptimizeForConnection< PermuteLayer, PermuteLayer, OptimizeInversePermutesImpl< PermuteLayer > > OptimizeInversePermutes
OptimizeForExclusiveConnection< PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl > FoldPadIntoConvolution2d
Optimizer::Optimizations MakeOptimizations(Args &&... args)
Definition: Optimizer.hpp:43
OptimizeForConnection< TransposeLayer, TransposeLayer, OptimizeInversePermutesImpl< TransposeLayer > > OptimizeInverseTransposes
OptimizeForExclusiveConnection< PadLayer, DepthwiseConvolution2dLayer, pad_fold::FoldPadIntoDepthwiseConvolution2dImpl > FoldPadIntoDepthwiseConvolution2d
OptimizeForConnection< TransposeLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl< TransposeLayer > > TransposeAndBatchToSpaceAsDepthToSpace
OptimizeForExclusiveConnection< DepthwiseConvolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< DepthwiseConvolution2dLayer, armnn::DataType::Float32 > > FuseBatchNormIntoDepthwiseConvolution2DFloat32
OptimizeForExclusiveConnection< DepthwiseConvolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< DepthwiseConvolution2dLayer, armnn::DataType::Float16 > > FuseBatchNormIntoDepthwiseConvolution2DFloat16
OptimizeForType< Layer, RedirectMembersToConstantInputsImpl > RedirectMembersToConstantInputs
OptimizeForExclusiveConnection< Convolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< Convolution2dLayer, armnn::DataType::Float16 > > FuseBatchNormIntoConvolution2DFloat16
OptimizeForExclusiveConnection< Convolution2dLayer, BatchNormalizationLayer, FuseBatchNorm< Convolution2dLayer, armnn::DataType::Float32 > > FuseBatchNormIntoConvolution2DFloat32
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
OptimizeForConnection< Layer, ReshapeLayer, SquashEqualSiblingsImpl< ReshapeLayer > > SquashEqualReshapeSiblings
OptimizeForConnection< Layer, TransposeLayer, MoveTransposeUpImpl > MoveTransposeUp
OptimizeForType< Layer, AddDebugImpl > InsertDebugLayer
Definition: AddDebug.hpp:34
OptimizeForConnection< ReshapeLayer, ReshapeLayer, OptimizeConsecutiveReshapesImpl > OptimizeConsecutiveReshapes
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
OptimizeForConnection< ConvertFp16ToFp32Layer, ConvertFp32ToFp16Layer, OptimizeInverseConversionsImpl > OptimizeInverseConversionsFp16
OptimizeForConnection< ConstantLayer, PermuteLayer, ConvertConstPermuteLayersToConstLayers > FusePermuteIntoConstLayer
OptimizeForConnection< PermuteLayer, BatchToSpaceNdLayer, PermuteAndBatchToSpaceAsDepthToSpaceImpl< PermuteLayer > > PermuteAndBatchToSpaceAsDepthToSpace
OptimizeForConnection< Layer, PermuteLayer, MovePermuteUpImpl > MovePermuteUp
ConvertConstants< Float32ToFloat16, IsFloat16Layer > ConvertConstantsFloatToHalf
OptimizeForType< TransposeLayer, TransposeAsReshapeImpl > TransposeAsReshape
OptimizationResult ApplyBackendOptimizations(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, BackendsMap &backends, const ModelOptions &modelOptions, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:1139
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
OptimizeForType< PermuteLayer, PermuteAsReshapeImpl > PermuteAsReshape
OptimizeForConnection< Layer, TransposeLayer, SquashEqualSiblingsImpl< TransposeLayer > > SquashEqualTransposeSiblings
ConvertConstants< Float16ToFloat32, IsFloat32Layer > ConvertConstantsHalfToFloat
BackendsMap CreateSupportedBackends(TensorHandleFactoryRegistry &handleFactoryRegistry, BackendSettings &backendSettings)
Definition: Network.cpp:1120
OptimizeForConnection< ConvertFp32ToFp16Layer, ConvertFp16ToFp32Layer, OptimizeInverseConversionsImpl > OptimizeInverseConversionsFp32
OptimizeForExclusiveConnection< PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl > FoldPadIntoPooling2d
OptimizeForType< Layer, ConvertFp32NetworkToFp16Impl > Fp32NetworkToFp16Converter
OptimizationResult SelectTensorHandleStrategy(Graph &optGraph, BackendsMap &backends, TensorHandleFactoryRegistry &registry, bool importEnabled, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:1601
OptimizeForType< Layer, AddBroadcastReshapeLayerImpl > AddBroadcastReshapeLayer
OptimizeForConnection< ConstantLayer, DequantizeLayer, ConvertConstDequantisationLayersToConstLayersImpl > ConvertConstDequantisationLayersToConstLayers
OptimizeForType< Layer, ConvertFp32NetworkToBf16Impl > Fp32NetworkToBf16Converter
OptimizationResult AssignBackends(OptimizedNetworkImpl *optNetObjPtr, BackendSettings &backendSettings, SubgraphView &subgraph, Optional< std::vector< std::string > &> errMessages)
Definition: Network.cpp:1106
std::map< BackendId, std::unique_ptr< class IBackendInternal > > BackendsMap
Definition: Network.hpp:294

◆ Pad()

void Pad ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const ITensorHandle inputHandle,
ITensorHandle outputHandle,
const PadQueueDescriptor data 
)

Definition at line 39 of file Pad.cpp.

References Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), PadDescriptor::m_PadList, PadDescriptor::m_PadValue, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, ITensorHandle::Map(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

44 {
45  auto padList = data.m_Parameters.m_PadList;
46  auto padValue = data.m_Parameters.m_PadValue;
47 
48  unsigned int numOutputElements = outputInfo.GetNumElements();
49 
50  TensorShape outputShape = outputInfo.GetShape();
51  TensorShape inputShape = inputInfo.GetShape();
52 
53  unsigned int numInputDimensions = inputShape.GetNumDimensions();
54 
55 #ifndef NDEBUG
56 
57  unsigned int numOutputDimensions = outputShape.GetNumDimensions();
58  assert(numInputDimensions == numOutputDimensions);
59 
60 #endif
61 
62  unsigned int inputBatches = 0;
63  unsigned int inputChannels = 0;
64  unsigned int inputHeight = 0;
65  unsigned int inputWidth = 0;
66 
67  unsigned int outputChannels = 0;
68  unsigned int outputHeight = 0;
69  unsigned int outputWidth = 0;
70 
71  auto inputData = MakeDecoder<float>(inputInfo, inputHandle->Map());
72  auto outData = MakeEncoder<float>(outputInfo, outputHandle->Map());
73 
74  // Fill the output tensor with Pad value first
75  if (outputInfo.IsQuantized())
76  {
77  // For Quantized types Pad Value should not be quantized with scale and offset of the tensor info
78  auto temporaryInfo = TensorInfo(outputInfo.GetShape(), outputInfo.GetDataType(), 1.0f, 0);
79  auto outputData = MakeEncoder<float>(temporaryInfo, outputHandle->Map());
80  FillOutputWithPadValue(*outputData, padValue, numOutputElements);
81  }
82  else
83  {
84  FillOutputWithPadValue(*outData, padValue, numOutputElements);
85  }
86 
87  Decoder<float>& input = *inputData;
88  Encoder<float>& output = *outData;
89 
90  switch(numInputDimensions) {
91 
92  case 1:
93  inputWidth = inputShape[0];
94  for (unsigned int w = 0; w < inputWidth ; w++)
95  {
96  input[w];
97  auto inputValue = input.Get();
98  auto outputIndex = w + std::get<0>(padList[0]);
99  output[outputIndex];
100  output.Set(inputValue);
101  }
102 
103  break;
104  case 2 :
105  inputHeight = inputShape[0];
106  inputWidth = inputShape[1];
107  outputWidth = outputShape[1];
108 
109  for (unsigned int h = 0; h < inputHeight; h++)
110  {
111  for (unsigned int w = 0; w < inputWidth ; w++)
112  {
113  input[h * inputWidth + w];
114  auto inputValue = input.Get();
115  auto outputIndex = (h + std::get<0>(padList[0])) * outputWidth + (w + std::get<0>(padList[1]));
116  output[outputIndex];
117  output.Set(inputValue);
118  }
119  }
120 
121  break;
122  case 3 :
123  inputChannels = inputShape[0];
124  inputHeight = inputShape[1];
125  inputWidth = inputShape[2];
126  outputHeight = outputShape[1];
127  outputWidth = outputShape[2];
128 
129  for (unsigned int c = 0; c < inputChannels; c++)
130  {
131  for (unsigned int h = 0; h < inputHeight; h++)
132  {
133  for (unsigned int w = 0; w < inputWidth ; w++)
134  {
135  input[c * inputHeight * inputWidth + h * inputWidth + w];
136  auto inputValue = input.Get();
137  auto outputIndex = (c + std::get<0>(padList[0])) * outputHeight * outputWidth
138  + (h + std::get<0>(padList[1])) * outputWidth
139  + (w + std::get<0>(padList[2]));
140  output[outputIndex];
141  output.Set(inputValue);
142  }
143  }
144  }
145 
146  break;
147  case 4 :
148  inputBatches = inputShape[0];
149  inputChannels = inputShape[1];
150  inputHeight = inputShape[2];
151  inputWidth = inputShape[3];
152  outputChannels = outputShape[1];
153  outputHeight = outputShape[2];
154  outputWidth = outputShape[3];
155 
156  for (unsigned int b = 0; b < inputBatches; b++)
157  {
158  for (unsigned int c = 0; c < inputChannels; c++)
159  {
160  for (unsigned int h = 0; h < inputHeight; h++)
161  {
162  for (unsigned int w = 0; w < inputWidth ; w++)
163  {
164  input[b * inputChannels * inputHeight * inputWidth
165  + c * inputHeight * inputWidth
166  + h * inputWidth
167  + w];
168  auto inputValue = input.Get();
169  auto outputIndex = (b + std::get<0>(padList[0]))
170  * outputChannels * outputHeight * outputWidth
171  + (c + std::get<0>(padList[1])) * outputHeight * outputWidth
172  + (h + std::get<0>(padList[2])) * outputWidth
173  + (w + std::get<0>(padList[3]));
174  output[outputIndex];
175  output.Set(inputValue);
176  }
177  }
178  }
179  }
180 
181  break;
182  default :
183  break;
184  }
185 }

◆ ParseBoolean()

bool armnn::ParseBoolean ( const BackendOptions::Var value,
bool  defaultValue 
)

Definition at line 97 of file ClBackendContext.cpp.

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

98 {
99  if (value.IsBool())
100  {
101  return value.AsBool();
102  }
103  return defaultValue;
104 }

◆ ParseBooleanBackendOption()

bool armnn::ParseBooleanBackendOption ( const armnn::BackendOptions::Var value,
bool  defaultValue 
)
inline

Definition at line 312 of file BackendOptions.hpp.

References BackendOptions::Var::AsBool(), and BackendOptions::Var::IsBool().

313 {
314  if (value.IsBool())
315  {
316  return value.AsBool();
317  }
318  return defaultValue;
319 }
bool AsBool() const
Value getters.
bool IsBool() const
Type getters.

◆ ParseComputeDevice()

constexpr armnn::Compute armnn::ParseComputeDevice ( const char *  str)

Deprecated function that will be removed together with the Compute enum.

Definition at line 182 of file TypesUtils.hpp.

References CpuAcc, CpuRef, GpuAcc, StrEqual(), and Undefined.

Referenced by operator>>().

183 {
184  if (armnn::StrEqual(str, "CpuAcc"))
185  {
186  return armnn::Compute::CpuAcc;
187  }
188  else if (armnn::StrEqual(str, "CpuRef"))
189  {
190  return armnn::Compute::CpuRef;
191  }
192  else if (armnn::StrEqual(str, "GpuAcc"))
193  {
194  return armnn::Compute::GpuAcc;
195  }
196  else
197  {
199  }
200 }
CPU Execution: Reference C++ kernels.
constexpr bool StrEqual(const char *strA, const char(&strB)[N])
Definition: TypesUtils.hpp:170
GPU Execution: OpenCL: ArmCompute.
CPU Execution: NEON: ArmCompute.

◆ ParseFile()

std::string armnn::ParseFile ( const BackendOptions::Var value,
std::string  defaultValue 
)

Definition at line 106 of file ClBackendContext.cpp.

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

Referenced by ClBackendContext::ClBackendContext(), and ClBackendModelContext::ClBackendModelContext().

107 {
108  if (value.IsString())
109  {
110  return value.AsString();
111  }
112  return defaultValue;
113 }

◆ ParseIntBackendOption()

int armnn::ParseIntBackendOption ( const armnn::BackendOptions::Var value,
int  defaultValue 
)
inline

Definition at line 330 of file BackendOptions.hpp.

References BackendOptions::Var::AsInt(), and BackendOptions::Var::IsInt().

Referenced by ClBackendModelContext::ClBackendModelContext().

331 {
332  if (value.IsInt())
333  {
334  return value.AsInt();
335  }
336  return defaultValue;
337 }

◆ ParseOptions()

void armnn::ParseOptions ( const std::vector< BackendOptions > &  options,
BackendId  backend,
f 
)

Definition at line 297 of file BackendOptions.hpp.

References BackendOptions::BackendOption::GetName(), and BackendOptions::BackendOption::GetValue().

Referenced by ClBackendContext::ClBackendContext(), ClBackendModelContext::ClBackendModelContext(), NeonBackendModelContext::NeonBackendModelContext(), Optimize(), and RuntimeImpl::RuntimeImpl().

298 {
299  for (auto optionsGroup : options)
300  {
301  if (optionsGroup.GetBackendId() == backend)
302  {
303  for (size_t i=0; i < optionsGroup.GetOptionCount(); i++)
304  {
305  const BackendOptions::BackendOption option = optionsGroup.GetOption(i);
306  f(option.GetName(), option.GetValue());
307  }
308  }
309  }
310 }

◆ ParseStringBackendOption()

std::string armnn::ParseStringBackendOption ( const armnn::BackendOptions::Var value,
std::string  defaultValue 
)
inline

Definition at line 321 of file BackendOptions.hpp.

References BackendOptions::Var::AsString(), and BackendOptions::Var::IsString().

322 {
323  if (value.IsString())
324  {
325  return value.AsString();
326  }
327  return defaultValue;
328 }
std::string AsString() const

◆ ParseTuningLevel()

TuningLevel armnn::ParseTuningLevel ( const BackendOptions::Var value,
TuningLevel  defaultValue 
)

Definition at line 79 of file ClBackendContext.cpp.

References ARMNN_LOG, BackendOptions::Var::AsInt(), Exhaustive, BackendOptions::Var::IsInt(), None, and warning.

Referenced by ClBackendContext::ClBackendContext().

80 {
81  if (value.IsInt())
82  {
83  int v = value.AsInt();
84  if (v > static_cast<int>(TuningLevel::Exhaustive) ||
85  v < static_cast<int>(TuningLevel::None))
86  {
87  ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. "
88  "Using default(" << static_cast<int>(defaultValue) << ")";
89  } else
90  {
91  return static_cast<TuningLevel>(v);
92  }
93  }
94  return defaultValue;
95 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

◆ PermuteTensor()

armnn::ConstTensor PermuteTensor ( const ConstTensorHandle tensor,
const PermutationVector permutationVector,
void *  permuteBuffer 
)

Definition at line 18 of file WorkloadUtils.cpp.

References ARMNN_ASSERT_MSG, ConstTensorHandle::GetConstTensor(), TensorInfo::GetDataType(), GetDataTypeSize(), TensorInfo::GetNumBytes(), TensorInfo::GetShape(), PermutationVector::GetSize(), ConstTensorHandle::GetTensorInfo(), Permute, armnnUtils::Permuted(), and TensorInfo::SetConstant().

Referenced by Convert1HWOTensorToAcl(), Convert1HWOtoMIHW(), ConvertWeightTensorFromArmnnToAcl(), and GatherTensorHandlePairs().

20 {
21  ARMNN_ASSERT_MSG(tensor, "Invalid input tensor");
22  ARMNN_ASSERT_MSG(permuteBuffer, "Invalid permute buffer");
23 
24  TensorInfo tensorInfo = tensor->GetTensorInfo();
25 
26  if (permutationVector.GetSize() > 0)
27  {
28  tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector);
29  armnnUtils::Permute(tensorInfo.GetShape(), permutationVector,
30  tensor->GetConstTensor<void>(), permuteBuffer,
31  GetDataTypeSize(tensorInfo.GetDataType()));
32  }
33  else
34  {
35  ::memcpy(permuteBuffer, tensor->GetConstTensor<void>(), tensorInfo.GetNumBytes());
36  }
37  tensorInfo.SetConstant(true);
38  return ConstTensor(tensorInfo, permuteBuffer);
39 }
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:131
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:151

◆ PolymorphicDowncast()

DestType armnn::PolymorphicDowncast ( SourceType *  value)

Polymorphic downcast for build in pointers only.

Usage: Child* pChild = PolymorphicDowncast<Child*>(pBase);

Template Parameters
DestTypePointer type to the target object (Child pointer type)
SourceTypePointer type to the source object (Base pointer type)
Parameters
valuePointer to the source object
Returns
Pointer of type DestType (Pointer of type child)

Definition at line 74 of file PolymorphicDowncast.hpp.

References ARMNN_POLYMORPHIC_CAST_CHECK.

Referenced by ClLayerSupport::IsLayerSupported(), and NeonLayerSupport::IsLayerSupported().

75 {
76  static_assert(std::is_pointer<DestType>::value,
77  "PolymorphicDowncast only works with pointer types.");
78 
79  ARMNN_POLYMORPHIC_CAST_CHECK(dynamic_cast<DestType>(value) == value);
80  return static_cast<DestType>(value);
81 }
#define ARMNN_POLYMORPHIC_CAST_CHECK(cond)

◆ PolymorphicPointerDowncast()

auto armnn::PolymorphicPointerDowncast ( const SourceType &  value)

Polymorphic downcast for shared pointers and build in pointers.

Usage: auto pChild = PolymorphicPointerDowncast<Child>(pBase)

Template Parameters
DestTypeType of the target object (Child type)
SourceTypePointer type to the source object (Base (shared) pointer type)
Parameters
valuePointer to the source object
Returns
Pointer of type DestType ((Shared) pointer of type child)

Definition at line 93 of file PolymorphicDowncast.hpp.

References ARMNN_POLYMORPHIC_CAST_CHECK.

94 {
95  ARMNN_POLYMORPHIC_CAST_CHECK(utility::DynamicPointerCast<DestType>(value)
96  == value);
97  return utility::StaticPointerCast<DestType>(value);
98 }
#define ARMNN_POLYMORPHIC_CAST_CHECK(cond)

◆ Pooling2d()

void Pooling2d ( Decoder< float > &  rInputDecoder,
Encoder< float > &  rOutputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const Pooling2dDescriptor params 
)

Computes the Pooling2d operation.

Definition at line 142 of file Pooling2d.cpp.

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDataLayout(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Pooling2dDescriptor::m_DataLayout, Pooling2dDescriptor::m_PadBottom, Pooling2dDescriptor::m_PaddingMethod, Pooling2dDescriptor::m_PadLeft, Pooling2dDescriptor::m_PadRight, Pooling2dDescriptor::m_PadTop, Pooling2dDescriptor::m_PoolHeight, Pooling2dDescriptor::m_PoolType, Pooling2dDescriptor::m_PoolWidth, Pooling2dDescriptor::m_StrideX, Pooling2dDescriptor::m_StrideY, NHWC, numeric_cast(), Pooling2d(), and Encoder< IType >::Set().

Referenced by Pooling2d(), Pooling2dLayer::Pooling2dLayer(), and TEST_SUITE().

147 {
148  const DataLayoutIndexed dataLayout(params.m_DataLayout);
149  auto channelsIndex = dataLayout.GetChannelsIndex();
150  auto heightIndex = dataLayout.GetHeightIndex();
151  auto widthIndex = dataLayout.GetWidthIndex();
152 
153  const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
154  const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
155  const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
156  const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
157  const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
158  const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
159  const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
160  const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
161  const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
162  const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
163  const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
164  const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
165  const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
166  const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
167 
168  float defaultInitializer = DefaultInitializer(params.m_PoolType);
169 
170  Accumulator accumulate = GetAccumulator(params.m_PoolType);
171  Executor execute = GetExecutor(params.m_PoolType);
172 
173  // Check supported padding methods outside the loop to simplify
174  // the inner loop.
175  if (params.m_PaddingMethod != PaddingMethod::Exclude &&
176  params.m_PaddingMethod != PaddingMethod::IgnoreValue)
177  {
178  throw armnn::InvalidArgumentException("Unsupported padding type");
179  }
180 
181  const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
182 
183  for (int n = 0; n < batchSize; n++)
184  {
185  for (int c = 0; c < channels; c++)
186  {
187  for (int yOutput = 0; yOutput < heightOutput; yOutput++)
188  {
189  // Calculate values independent of the x axis
190  int hstart = (yOutput * strideY) - padTop;
191  int hend = hstart + poolHeight;
192  // Clamp the pooling region inside the valid input area (which includes the padding).
193  // This is necessary because the final pooling in a row may overlap beyond the padding.
194  hend = std::min(hend, heightInput + padBottom);
195 
196  int height = hend - hstart;
197  bool hclamped = ClampRange(hstart, hend, heightInput);
198 
199  for (int xOutput = 0; xOutput < widthOutput; xOutput++)
200  {
201  int wstart = (xOutput * strideX) - padLeft;
202  int wend = wstart + poolWidth;
203 
204  // Clamp the pooling region inside the valid input area (which includes the padding).
205  // This is necessary because the final pooling in a row may overlap beyond the padding.
206  wend = std::min(wend, widthInput + padRight);
207 
208  float result = defaultInitializer;
209  float poolAreaSize = armnn::numeric_cast<float>(height * (wend - wstart));
210 
211  // Special case: when the pooling kernel is over a padding region and the padding
212  // size is larger or equal to the kernel and the kernel only covers
213  // padding and no real values, then we initialize the result as zero
214  // by convention. This is because we need to choose a value here and
215  // all values we have are padding, which we ignore.
216  if (OnPaddingOnly(hstart, hend, heightInput) ||
217  OnPaddingOnly(wstart, wend, widthInput))
218  {
219  result = 0.0f;
220 
221  int outputIndex;
222 
223  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
224  {
225  outputIndex = n * heightOutput * widthOutput * channels +
226  yOutput * widthOutput * channels +
227  xOutput * channels +
228  c;
229  }
230  else
231  {
232  outputIndex = n * heightOutput * widthOutput * channels +
233  c * heightOutput * widthOutput +
234  yOutput * widthOutput +
235  xOutput;
236  }
237 
238  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
239  rOutputEncoder.Set(result);
240  continue;
241  }
242 
243  bool clamped = hclamped |= ClampRange(wstart, wend, widthInput);
244 
245  if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
246  {
247  // When we exclude the padding, it means we calculate with a smaller
248  // kernel size, so I changed the divisor here.
249  poolAreaSize = armnn::numeric_cast<float>((hend - hstart) * (wend - wstart));
250  }
251 
252  for (auto yInput = hstart; yInput < hend; yInput++)
253  {
254  for (auto xInput = wstart; xInput < wend; xInput++)
255  {
256 
257  int inputIndex;
258  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
259  {
260  inputIndex = n * heightInput * widthInput * channels +
261  yInput * widthInput * channels +
262  xInput * channels +
263  c;
264 
265  }
266  else
267  {
268  inputIndex = n * heightInput * widthInput * channels +
269  c * heightInput * widthInput +
270  yInput * widthInput +
271  xInput;
272  }
273 
274  accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
275  }
276  }
277 
278  execute(result, poolAreaSize);
279 
280  int outputIndex;
281 
282  if(dataLayout.GetDataLayout() == DataLayout::NHWC)
283  {
284  outputIndex = n * heightOutput * widthOutput * channels +
285  yOutput * widthOutput * channels +
286  xOutput * channels +
287  c;
288  }
289  else
290  {
291  outputIndex = n * heightOutput * widthOutput * channels +
292  c * heightOutput * widthOutput +
293  yOutput * widthOutput +
294  xOutput;
295  }
296 
297  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
298  rOutputEncoder.Set(result);
299  }
300  }
301  }
302  }
303 }
uint32_t m_PadBottom
Padding bottom value in the height dimension.
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_PoolWidth
Pooling width value.
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).
uint32_t m_PadTop
Padding top value in the height dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadRight
Padding right value in the width dimension.
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.

◆ Pooling3d()

void Pooling3d ( Decoder< float > &  rInputDecoder,
Encoder< float > &  rOutputEncoder,
const TensorInfo inputInfo,
const TensorInfo outputInfo,
const Pooling3dDescriptor params 
)

Computes the Pooling3d operation.

Definition at line 172 of file Pooling3d.cpp.

References Decoder< IType >::DecodeTensor(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetDepthIndex(), DataLayoutIndexed::GetHeightIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), Pooling3dDescriptor::m_DataLayout, Pooling3dDescriptor::m_PadBack, Pooling3dDescriptor::m_PadBottom, Pooling3dDescriptor::m_PaddingMethod, Pooling3dDescriptor::m_PadFront, Pooling3dDescriptor::m_PadLeft, Pooling3dDescriptor::m_PadRight, Pooling3dDescriptor::m_PadTop, Pooling3dDescriptor::m_PoolDepth, Pooling3dDescriptor::m_PoolHeight, Pooling3dDescriptor::m_PoolType, Pooling3dDescriptor::m_PoolWidth, Pooling3dDescriptor::m_StrideX, Pooling3dDescriptor::m_StrideY, Pooling3dDescriptor::m_StrideZ, numeric_cast(), Pooling3d(), and Encoder< IType >::Set().

Referenced by Pooling3d(), and Pooling3dLayer::Pooling3dLayer().

177 {
178  const DataLayoutIndexed dataLayout(params.m_DataLayout);
179 
180  auto channelsIndex = dataLayout.GetChannelsIndex();
181 
182  auto depthIndex = dataLayout.GetDepthIndex();
183  auto heightIndex = dataLayout.GetHeightIndex();
184  auto widthIndex = dataLayout.GetWidthIndex();
185 
186  const int batchSize = armnn::numeric_cast<int>(outputInfo.GetShape()[0]);
187  const int channels = armnn::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
188 
189  const int depthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[depthIndex]);
190  const int heightOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[heightIndex]);
191  const int widthOutput = armnn::numeric_cast<int>(outputInfo.GetShape()[widthIndex]);
192 
193  const int depthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[depthIndex]);
194  const int heightInput = armnn::numeric_cast<int>(inputInfo.GetShape()[heightIndex]);
195  const int widthInput = armnn::numeric_cast<int>(inputInfo.GetShape()[widthIndex]);
196 
197  const int padLeft = armnn::numeric_cast<int>(params.m_PadLeft);
198  const int padRight = armnn::numeric_cast<int>(params.m_PadRight);
199  const int padTop = armnn::numeric_cast<int>(params.m_PadTop);
200  const int padBottom = armnn::numeric_cast<int>(params.m_PadBottom);
201  const int padFront = armnn::numeric_cast<int>(params.m_PadFront);
202  const int padBack = armnn::numeric_cast<int>(params.m_PadBack);
203 
204  const int strideX = armnn::numeric_cast<int>(params.m_StrideX);
205  const int strideY = armnn::numeric_cast<int>(params.m_StrideY);
206  const int strideZ = armnn::numeric_cast<int>(params.m_StrideZ);
207 
208  const int poolHeight = armnn::numeric_cast<int>(params.m_PoolHeight);
209  const int poolWidth = armnn::numeric_cast<int>(params.m_PoolWidth);
210  const int poolDepth = armnn::numeric_cast<int>(params.m_PoolDepth);
211 
212  float defaultInitializer = DefaultInitializer(params.m_PoolType);
213  Accumulator accumulate = GetAccumulator(params.m_PoolType);
214  Executor execute = GetExecutor(params.m_PoolType);
215 
216  // Check supported padding methods outside the loop to simplify
217  // the inner loop.
218  if (params.m_PaddingMethod != PaddingMethod::Exclude &&
219  params.m_PaddingMethod != PaddingMethod::IgnoreValue)
220  {
221  throw armnn::InvalidArgumentException("Unsupported padding type");
222  }
223 
224  const std::vector<float> decodedInputVec = rInputDecoder.DecodeTensor(inputInfo.GetShape());
225 
226  for (int n = 0; n < batchSize; n++)
227  {
228  for (int c = 0; c < channels; c++)
229  {
230  for (int zOutput = 0; zOutput < depthOutput; zOutput++)
231  {
232  // Calculate values independent of the x and y axis
233  int dstart = (zOutput * strideZ) - padFront;
234  int dend = dstart + poolDepth;
235  // Clamp the pooling region inside the valid input area (which includes the padding).
236  // This is necessary because the final pooling in a row may overlap beyond the padding.
237  dend = std::min(dend, depthInput + padBack);
238 
239  int depth = dend - dstart;
240  bool dclamped = ClampRange(dstart, dend, depthInput);
241  int depthClamped = dend - dstart;
242 
243  for (int yOutput = 0; yOutput < heightOutput; yOutput++)
244  {
245  int hstart = (yOutput * strideY) - padTop;
246  int hend = hstart + poolHeight;
247  // Clamp the pooling region inside the valid input area (which includes the padding).
248  // This is necessary because the final pooling in a row may overlap beyond the padding.
249  hend = std::min(hend, heightInput + padBottom);
250 
251  int height = hend - hstart;
252  bool hclamped = ClampRange(hstart, hend, heightInput);
253  int heightClamped = hend - hstart;
254 
255  for (int xOutput = 0; xOutput < widthOutput; xOutput++)
256  {
257  int wstart = (xOutput * strideX) - padLeft;
258  int wend = wstart + poolWidth;
259  // Clamp the pooling region inside the valid input area (which includes the padding).
260  // This is necessary because the final pooling in a row may overlap beyond the padding.
261  wend = std::min(wend, widthInput + padRight);
262 
263  int width = wend - wstart;
264  bool wclamped = ClampRange(wstart, wend, widthInput);
265  int widthClamped = wend - wstart;
266 
267  float result = defaultInitializer;
268  float poolAreaSize = armnn::numeric_cast<float>(depth * height * width);
269 
270  // Special case: when the pooling kernel is over a padding region and the padding
271  // size is larger or equal to the kernel and the kernel only covers
272  // padding and no real values, then we initialize the result as zero
273  // by convention. This is because we need to choose a value here and
274  // all values we have are padding, which we ignore.
275  if (OnPaddingOnly(dstart, dend, depthInput) ||
276  OnPaddingOnly(hstart, hend, heightInput) ||
277  OnPaddingOnly(wstart, wend, widthInput))
278  {
279  result = 0.0f;
280 
281  int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
282  n, c, zOutput, yOutput, xOutput, dataLayout);
283 
284  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
285  rOutputEncoder.Set(result);
286 
287  continue;
288  }
289 
290  bool clamped = (dclamped | hclamped | wclamped);
291 
292  if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude)
293  {
294  // When we exclude the padding, it means we calculate with a smaller
295  // kernel size, so I changed the divisor here.
296  poolAreaSize = armnn::numeric_cast<float>(depthClamped * heightClamped * widthClamped);
297  }
298 
299  for (auto zInput = dstart; zInput < dend; zInput++)
300  {
301  for (auto yInput = hstart; yInput < hend; yInput++)
302  {
303  for (auto xInput = wstart; xInput < wend; xInput++)
304  {
305 
306  int inputIndex = CalculateIndex(channels, depthInput, heightInput, widthInput,
307  n, c, zInput, yInput, xInput, dataLayout);
308 
309  accumulate(result, decodedInputVec[static_cast<unsigned int>(inputIndex)]);
310  }
311  }
312  }
313 
314  execute(result, poolAreaSize);
315 
316  int outputIndex = CalculateIndex(channels, depthOutput, heightOutput, widthOutput,
317  n, c, zOutput, yOutput, xOutput, dataLayout);
318 
319  rOutputEncoder[static_cast<unsigned int>(outputIndex)];
320  rOutputEncoder.Set(result);
321  }
322  }
323  }
324  }
325  }
326 }
PoolingAlgorithm m_PoolType
The pooling algorithm to use (Max. Average, L2).
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
uint32_t m_PoolWidth
Pooling width value.
uint32_t m_PoolDepth
Pooling depth value.
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
uint32_t m_PadRight
Padding right value in the width dimension.
DataLayout m_DataLayout
The data layout to be used (NCDHW, NDHWC).
uint32_t m_PadFront
Padding front value in the depth dimension.
uint32_t m_PoolHeight
Pooling height value.
uint32_t m_PadBack
Padding back value in the depth dimension.
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
uint32_t m_PadBottom
Padding bottom value in the height dimension.
uint32_t m_StrideZ
Stride value when proceeding through input for the depth dimension.
uint32_t m_PadLeft
Padding left value in the width dimension.
uint32_t m_PadTop
Padding top value in the height dimension.
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
PaddingMethod m_PaddingMethod
The padding method to be used. (Exclude, IgnoreValue).

◆ PreluImpl()

void PreluImpl ( const TensorInfo inputInfo,
const TensorInfo alphaInfo,
const TensorInfo outputInfo,
Decoder< float > &  inputData,
Decoder< float > &  alphaData,
Encoder< float > &  outputData 
)

Definition at line 13 of file PreluImpl.cpp.

References TensorInfo::GetShape(), and BroadcastLoop::Unroll().

Referenced by RefPreluWorkload::ExecuteAsync().

19 {
20  const TensorShape& inputShape = inputInfo.GetShape();
21  const TensorShape& alphaShape = alphaInfo.GetShape();
22  const TensorShape& outputShape = outputInfo.GetShape();
23 
24  // PReLU activation: f(x) = alpha * x for x < 0, f(x) = x for x >= 0
25  auto prelu = [](float x, float alpha)
26  {
27  return x < 0 ? alpha * x : x;
28  };
29 
30  BroadcastLoop(inputShape, alphaShape, outputShape).Unroll(prelu, 0, inputData, alphaData, outputData);
31 }

◆ ProfilingUpdateDescriptions()

void armnn::ProfilingUpdateDescriptions ( const std::string &  name,
const DescriptorType &  desc,
const WorkloadInfo infos,
const arm::pipe::ProfilingGuid  guid 
)
inline

< Profiler used

Definition at line 180 of file Profiling.hpp.

References ProfilerManager::GetInstance(), and IProfiler::IsProfilingEnabled().

184 {
185  IProfiler* profiler(ProfilerManager::GetInstance().GetProfiler()); ///< Profiler used
186  if (profiler && profiler->IsProfilingEnabled())
187  {
188  profiler->AddLayerDetails(name, desc, infos, guid);
189  }
190 }

◆ Quantize() [1/2]

void armnn::Quantize ( uint8_t *  quant,
const float *  dequant,
const TensorInfo info 
)
inline

Definition at line 114 of file RefWorkloadUtils.hpp.

References TensorInfo::GetNumElements(), TensorInfo::GetQuantizationOffset(), and TensorInfo::GetQuantizationScale().

115 {
116  for (size_t i = 0; i < info.GetNumElements(); i++)
117  {
118  quant[i] = armnn::Quantize<uint8_t>(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset());
119  }
120 }

◆ Quantize() [2/2]

template int32_t Quantize< int32_t > ( float  value,
float  scale,
int32_t  offset 
)

Quantize a floating point data type into an 8-bit data type.

Explicit specialization of Quantize for int32_t.

Explicit specialization of Quantize for int16_t.

Explicit specialization of Quantize for uint8_t.

Explicit specialization of Quantize for int8_t.

Parameters
value- The value to quantize.
scale- The scale (must be non-zero).
offset- The offset.
Returns
- The quantized value calculated as round(value/scale)+offset.

Definition at line 30 of file TypesUtils.cpp.

References ARMNN_ASSERT.

Referenced by TEST_SUITE().

31 {
32  static_assert(IsQuantizedType<QuantizedType>(), "Not an integer type.");
33  constexpr QuantizedType max = std::numeric_limits<QuantizedType>::max();
34  constexpr QuantizedType min = std::numeric_limits<QuantizedType>::lowest();
35  ARMNN_ASSERT(scale != 0.f);
36  ARMNN_ASSERT(!std::isnan(value));
37 
38  float clampedValue = std::min(std::max((static_cast<float>(offset) + static_cast<float>(round(value/scale))),
39  static_cast<float>(min)), static_cast<float>(max));
40  auto quantizedBits = static_cast<QuantizedType>(clampedValue);
41 
42  return quantizedBits;
43 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ Reduce()

void Reduce ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
Decoder< float > &  input,
Encoder< float > &  output,
const std::vector< uint32_t >  axis,
const ReduceOperation  reduceOperation 
)

Definition at line 70 of file Reduce.cpp.

References ARMNN_ASSERT, Decoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), Max, Mean, Min, NextIndex(), numeric_cast(), Prod, ReducedOutputOffset(), Encoder< IType >::Set(), and Sum.

76 {
77  armnn::TensorShape inputDims = inputInfo.GetShape();
78  unsigned int inputNumDims = inputInfo.GetNumDimensions();
79  unsigned int numOutputs = outputInfo.GetNumElements();
80 
81  // Initialise temp output
82  std::vector<float> tempOut(numOutputs);
83  switch(reduceOperation)
84  {
85  case ReduceOperation::Mean:
86  case ReduceOperation::Sum:
87  std::fill(tempOut.begin(), tempOut.end(), 0.0f);
88  break;
89  case ReduceOperation::Prod:
90  std::fill(tempOut.begin(), tempOut.end(), 1.0f);
91  break;
92  case ReduceOperation::Max:
93  std::fill(tempOut.begin(), tempOut.end(), -1 * std::numeric_limits<float>::max());
94  break;
95  case ReduceOperation::Min:
96  std::fill(tempOut.begin(), tempOut.end(), std::numeric_limits<float>::max());
97  break;
98  default:
99  throw armnn::InvalidArgumentException("Unknown reduce method: " +
100  std::to_string(static_cast<int>(reduceOperation)));
101  }
102 
103  // Initialise temp index
104  std::vector<unsigned int> tempIndex(inputNumDims, 0);
105 
106  std::vector<unsigned int> resolvedAxis = axis;
107  if (resolvedAxis.empty())
108  {
109  for (unsigned int idx = 0; idx < inputNumDims; ++idx)
110  {
111  resolvedAxis.push_back(idx);
112  }
113  }
114  auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
115 
116  // Iterates through input_data and operates over the reduced axis
117  for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
118  {
119  unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
120  unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
121  numResolvedAxis, resolvedAxis);
122  input[inputOffset];
123  auto inputValue = input.Get();
124  switch(reduceOperation)
125  {
126  case ReduceOperation::Mean:
127  case ReduceOperation::Sum:
128  tempOut[outputOffset] += inputValue;
129  break;
130  case ReduceOperation::Prod:
131  tempOut[outputOffset] *= inputValue;
132  break;
133  case ReduceOperation::Max:
134  if (inputValue > tempOut[outputOffset])
135  {
136  tempOut[outputOffset] = inputValue;
137  }
138  break;
139  case ReduceOperation::Min:
140  if (inputValue < tempOut[outputOffset])
141  {
142  tempOut[outputOffset] = inputValue;
143  }
144  break;
145  default:
146  throw armnn::InvalidArgumentException("Unknown reduce method: " +
147  std::to_string(static_cast<int>(reduceOperation)));
148  }
149  }
150 
151  // Takes average by num of elements added to get MEAN
152  size_t numElementsInAxis = 1;
153  for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
154  {
155  unsigned int current = inputDims[resolvedAxis[idx]];
156  ARMNN_ASSERT(armnn::numeric_cast<float>(current) <
157  (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
158  numElementsInAxis *= current;
159  }
160 
161  for (unsigned int idx = 0; idx < numOutputs; ++idx)
162  {
163  output[idx];
164  if (reduceOperation == ReduceOperation::Mean)
165  {
166  if (numElementsInAxis > 0)
167  {
168  output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis));
169  }
170  }
171  else
172  {
173  output.Set(tempOut[idx]);
174  }
175  }
176 }
bool NextIndex(const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &current)
Definition: Reduce.cpp:19
unsigned int ReducedOutputOffset(const unsigned int numDims, const armnn::TensorShape &dims, std::vector< unsigned int > &index, const unsigned int numAxis, const std::vector< unsigned int > &axis)
Definition: Reduce.cpp:40
virtual void Set(IType right)=0
virtual IType Get() const =0
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ ReducedOutputOffset()

unsigned int armnn::ReducedOutputOffset ( const unsigned int  numDims,
const armnn::TensorShape dims,
std::vector< unsigned int > &  index,
const unsigned int  numAxis,
const std::vector< unsigned int > &  axis 
)

Definition at line 40 of file Reduce.cpp.

Referenced by Reduce().

45 {
46  unsigned int offset = 0;
47  for (unsigned int idx = 0; idx < numDims; ++idx)
48  {
49  bool isAxis = false;
50  if (!axis.empty())
51  {
52  for (unsigned int axisIdx = 0; axisIdx < numAxis; ++axisIdx)
53  {
54  if (idx == axis[axisIdx])
55  {
56  isAxis = true;
57  break;
58  }
59  }
60  }
61  if (!isAxis)
62  {
63  offset = offset * dims[idx] + index[idx];
64  }
65  }
66  return offset;
67 }

◆ RefBackendId()

constexpr const char* armnn::RefBackendId ( )

Definition at line 10 of file RefBackendId.hpp.

Referenced by RefBackend::GetIdStatic().

10 { return "CpuRef"; }

◆ RefTensorHandleFactoryId()

constexpr const char* armnn::RefTensorHandleFactoryId ( )

Definition at line 15 of file RefTensorHandleFactory.hpp.

Referenced by RefTensorHandleFactory::GetIdStatic().

15 { return "Arm/Ref/TensorHandleFactory"; }

◆ ReorderWeightChannelsForAcl()

ConstTensor armnn::ReorderWeightChannelsForAcl ( const ConstTensor weightHandle,
DataLayout  dataLayout,
void *  permuteBuffer 
)

Definition at line 67 of file WorkloadUtils.cpp.

References BaseTensor< MemoryType >::GetInfo(), TensorInfo::GetNumBytes(), BaseTensor< MemoryType >::GetShape(), NCHW, and NHWC.

68 {
69  DataType* weight = static_cast<DataType*>(permuteBuffer);
70  const TensorShape& weightShape = weightHandle.GetShape();
71  unsigned int multiplier;
72  unsigned int height;
73  unsigned int width;
74  unsigned int inputChannels;
75  switch (dataLayout)
76  {
77  case DataLayout::NHWC: //It actually is [ H, W, I, M ]
78  height = weightShape[0];
79  width = weightShape[1];
80  inputChannels = weightShape[2];
81  multiplier = weightShape[3];
82  break;
83  case DataLayout::NCHW: //It actually is [ M, I, H, W ]
84  default:
85  height = weightShape[2];
86  width = weightShape[3];
87  inputChannels = weightShape[1];
88  multiplier = weightShape[0];
89  break;
90  }
91 
92  std::vector<DataType> weightAclOrder(height*width*inputChannels*multiplier);
93  unsigned int destinationWeightsChannel;
94  unsigned int totalChannels = inputChannels * multiplier;
95  unsigned int channelSize = height * width;
96  unsigned int inputChannel = 0;
97 
98  for (unsigned int originWeightsChannel = 0; originWeightsChannel < totalChannels; originWeightsChannel++)
99  {
100  inputChannel = originWeightsChannel % inputChannels;
101  destinationWeightsChannel = (originWeightsChannel - inputChannel) / inputChannels + multiplier * inputChannel;
102 
103  for (unsigned int i = 0; i < channelSize; i++)
104  {
105  weightAclOrder[i + destinationWeightsChannel * channelSize] =
106  weight[i + originWeightsChannel * channelSize];
107  }
108  }
109 
110  ::memcpy(permuteBuffer, weightAclOrder.data(), weightHandle.GetInfo().GetNumBytes());
111  return ConstTensor(weightHandle.GetInfo(), permuteBuffer);
112 }
DataType
Definition: Types.hpp:48

◆ ReplaceLayers()

void armnn::ReplaceLayers ( OptimizationViews optimizationViews,
LayerType baseLayer,
std::vector< IConnectableLayer *> &  layers 
)

Definition at line 364 of file ArmComputeSubgraphUtils.hpp.

References OptimizationViews::AddSubstitution().

367 {
368  std::list<IConnectableLayer*> replacementLayers(layers.begin(), layers.end());
369 
370  SubgraphView substitutionSubgraph(baseLayer);
371  SubgraphView replacementSubgraph(std::move(replacementLayers),
372  CreateIInputsFrom({replacementLayers.front()}),
373  CreateIOutputsFrom({replacementLayers.back()}));
374 
375  optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
376 }

◆ ReportError()

void armnn::ReportError ( const std::string &  errorMessage,
Optional< std::vector< std::string > &>  errorMessages 
)

Definition at line 556 of file Network.cpp.

References ARMNN_LOG, and warning.

Referenced by AssignBackends(), CheckScaleSetOnQuantizedType(), Optimize(), and ReturnWithError().

558 {
559  std::stringstream fullErrorMessage;
560  fullErrorMessage << "ERROR: " << errorMessage;
561  ARMNN_LOG(warning) << fullErrorMessage.str();
562  if (errorMessages)
563  {
564  errorMessages.value().push_back(fullErrorMessage.str());
565  }
566 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

◆ ReportUntouchedLayers()

void armnn::ReportUntouchedLayers ( OptimizationViews optimizationViews,
std::map< LayerGuid, Layer *>  untouched 
)
inline

Definition at line 82 of file ArmComputeSubgraphUtils.hpp.

References OptimizationViews::AddUntouchedSubgraph().

Referenced by NeonBackend::OptimizeSubgraphView(), and ClBackend::OptimizeSubgraphView().

83 {
84  std::vector<Layer*> untouchedVector;
85  for (const auto& pair : untouched)
86  {
87  Layer* layer = pair.second;
88  SubgraphView subgraphView({layer},
89  CreateIInputsFrom({layer}),
90  CreateIOutputsFrom({layer}));
91  optimizationViews.AddUntouchedSubgraph(std::move(subgraphView));
92  }
93 }

◆ ReportWarning()

void armnn::ReportWarning ( const std::string &  warningMessage,
Optional< std::vector< std::string > &>  warningMessages 
)

Definition at line 568 of file Network.cpp.

References ARMNN_LOG, and warning.

Referenced by ApplyBackendOptimizations(), and AttemptBackendAssignment().

570 {
571  std::stringstream fullWarningMessage;
572  fullWarningMessage << "WARNING: " << warningMessage;
573  ARMNN_LOG(warning) << fullWarningMessage.str();
574  if (warningMessages)
575  {
576  warningMessages.value().push_back(fullWarningMessage.str());
577  }
578 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

◆ RequiresCopy()

bool armnn::RequiresCopy ( ITensorHandleFactory::FactoryId  src,
ITensorHandleFactory::FactoryId  dst,
TensorHandleFactoryRegistry registry 
)

Definition at line 1247 of file Network.cpp.

References ITensorHandleFactory::GetExportFlags(), TensorHandleFactoryRegistry::GetFactory(), and ITensorHandleFactory::GetImportFlags().

Referenced by CalculateSlotOption().

1250 {
1251  if (src != dst)
1252  {
1253  ITensorHandleFactory* srcFactory = registry.GetFactory(src);
1254  ITensorHandleFactory* dstFactory = registry.GetFactory(dst);
1255 
1256  if (srcFactory && dstFactory &&
1257  (srcFactory->GetExportFlags() & dstFactory->GetImportFlags()) != 0)
1258  {
1259  return false;
1260  }
1261  return true;
1262  }
1263  return false;
1264 }

◆ ReshapeWeightsForAcl()

void ReshapeWeightsForAcl ( TensorInfo weightInfo,
DataLayout  dataLayout 
)

Definition at line 41 of file WorkloadUtils.cpp.

References TensorInfo::GetShape(), NCHW, NHWC, and TensorInfo::SetShape().

Referenced by ConvertWeightTensorFromArmnnToAcl(), ConvertWeightTensorInfoFromArmnnToAcl(), and GatherTensorHandlePairs().

42 {
43  // Reshape the weights in-place
44  const TensorShape& weightShape = weightInfo.GetShape();
45  switch (dataLayout)
46  {
47  case DataLayout::NHWC:
48  // The data layout is NHWC, reshape from [ H, W, I, M ] to [ 1, H, W, I * M ]
49  weightInfo.SetShape({ 1,
50  weightShape[0],
51  weightShape[1],
52  weightShape[2] * weightShape[3] });
53  weightInfo.SetShape({ 1,
54  weightShape[0] * weightShape[1],
55  weightShape[2],
56  weightShape[3] });
57  break;
58  case DataLayout::NCHW:
59  default:
60  // The data layout is NCHW, reshape from [ M, I, H, W ] to [ 1, I * M, H, W, ]
61  weightInfo.SetShape({ 1, weightShape[0] * weightShape[1], weightShape[2], weightShape[3] });
62  break;
63  }
64 }

◆ Resize()

void Resize ( Decoder< float > &  in,
const TensorInfo inputInfo,
Encoder< float > &  out,
const TensorInfo outputInfo,
DataLayoutIndexed  dataLayout,
armnn::ResizeMethod  resizeMethod,
bool  alignCorners,
bool  halfPixelCenters 
)

Definition at line 65 of file Resize.cpp.

References ARMNN_ASSERT, Bilinear, Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), NearestNeighbor, Resize(), and Encoder< IType >::Set().

Referenced by InferenceTestImage::GetSizeInBytes(), Resize(), ResizeLayer::ResizeLayer(), and TEST_SUITE().

73 {
74  // alignCorners and halfPixelCenters cannot both be true
75  ARMNN_ASSERT(!(alignCorners && halfPixelCenters));
76 
77  // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output
78  // image is projected into the input image to figure out the interpolants and weights. Note that this
79  // will yield different results than if projecting the centre of output texels.
80 
81  const unsigned int batchSize = inputInfo.GetShape()[0];
82  const unsigned int channelCount = inputInfo.GetShape()[dataLayout.GetChannelsIndex()];
83 
84  const unsigned int inputHeight = inputInfo.GetShape()[dataLayout.GetHeightIndex()];
85  const unsigned int inputWidth = inputInfo.GetShape()[dataLayout.GetWidthIndex()];
86  const unsigned int outputHeight = outputInfo.GetShape()[dataLayout.GetHeightIndex()];
87  const unsigned int outputWidth = outputInfo.GetShape()[dataLayout.GetWidthIndex()];
88 
89  // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates
90  // in the input image.
91  const float scaleY = CalculateResizeScale(inputHeight, outputHeight, alignCorners);
92  const float scaleX = CalculateResizeScale(inputWidth, outputWidth, alignCorners);
93 
94  TensorShape inputShape = inputInfo.GetShape();
95  TensorShape outputShape = outputInfo.GetShape();
96 
97  for (unsigned int n = 0; n < batchSize; ++n)
98  {
99  for (unsigned int c = 0; c < channelCount; ++c)
100  {
101  for (unsigned int y = 0; y < outputHeight; ++y)
102  {
103  // Corresponding real-valued height coordinate in input image.
104  float iy = PixelScaler(y, scaleY, halfPixelCenters, resizeMethod);
105 
106  // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation).
107  const float fiy = (resizeMethod == armnn::ResizeMethod::NearestNeighbor && alignCorners) ?
108  roundf(iy) : floorf(iy);
109  // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
110  const unsigned int y0 = static_cast<unsigned int>(std::max(fiy, 0.0f));
111 
112  // Interpolation weight (range [0,1]).
113  const float yw = iy - fiy;
114 
115  for (unsigned int x = 0; x < outputWidth; ++x)
116  {
117  // Real-valued and discrete width coordinates in input image.
118  float ix = PixelScaler(x, scaleX, halfPixelCenters, resizeMethod);
119 
120  // Nearest Neighbour uses rounding to align to corners
121  const float fix = resizeMethod == armnn::ResizeMethod::NearestNeighbor && alignCorners ?
122  roundf(ix) : floorf(ix);
123  // Pixel scaling a value with Half Pixel Centers can be negative, if so set to 0
124  const unsigned int x0 = static_cast<unsigned int>(std::max(fix, 0.0f));
125 
126  // Interpolation weight (range [0,1]).
127  const float xw = ix - fix;
128 
129  unsigned int x1;
130  unsigned int y1;
131  // Half Pixel Centers uses the scaling to compute a weighted parameter for nearby pixels
132  if (halfPixelCenters)
133  {
134  x1 = std::min(static_cast<unsigned int>(std::ceil(ix)), inputWidth - 1u);
135  y1 = std::min(static_cast<unsigned int>(std::ceil(iy)), inputHeight - 1u);
136  }
137  // Discrete width/height coordinates of texels below and to the right of (x0, y0).
138  else
139  {
140  x1 = std::min(x0 + 1, inputWidth - 1u);
141  y1 = std::min(y0 + 1, inputHeight - 1u);
142  }
143 
144  float interpolatedValue;
145  switch (resizeMethod)
146  {
148  {
149  in[dataLayout.GetIndex(inputShape, n, c, y0, x0)];
150  float input1 = in.Get();
151  in[dataLayout.GetIndex(inputShape, n, c, y0, x1)];
152  float input2 = in.Get();
153  in[dataLayout.GetIndex(inputShape, n, c, y1, x0)];
154  float input3 = in.Get();
155  in[dataLayout.GetIndex(inputShape, n, c, y1, x1)];
156  float input4 = in.Get();
157 
158  const float ly0 = Lerp(input1, input2, xw); // lerp along row y0.
159  const float ly1 = Lerp(input3, input4, xw); // lerp along row y1.
160  interpolatedValue = Lerp(ly0, ly1, yw);
161  break;
162  }
164  {
165  // calculate euclidean distance to the 4 neighbours
166  auto distance00 = EuclideanDistance(fix, fiy, x0, y0);
167  auto distance01 = EuclideanDistance(fix, fiy, x0, y1);
168  auto distance10 = EuclideanDistance(fix, fiy, x1, y0);
169  auto distance11 = EuclideanDistance(fix, fiy, x1, y1);
170 
171  auto minimum = std::min( { distance00, distance01, distance10, distance11 } );
172 
173  unsigned int xNearest = 0;
174  unsigned int yNearest = 0;
175 
176  if (minimum == distance00)
177  {
178  xNearest = x0;
179  yNearest = y0;
180  }
181  else if (minimum == distance01)
182  {
183  xNearest = x0;
184  yNearest = y1;
185  }
186  else if (minimum == distance10)
187  {
188  xNearest = x1;
189  yNearest = y0;
190  }
191  else if (minimum == distance11)
192  {
193  xNearest = x1;
194  yNearest = y1;
195  }
196  else
197  {
198  throw armnn::InvalidArgumentException("Resize Nearest Neighbor failure");
199  }
200 
201  in[dataLayout.GetIndex(inputShape, n, c, yNearest, xNearest)];
202  interpolatedValue = in.Get();
203  break;
204  }
205  default:
206  throw armnn::InvalidArgumentException("Unknown resize method: " +
207  std::to_string(static_cast<int>(resizeMethod)));
208  }
209  out[dataLayout.GetIndex(outputShape, n, c, y, x)];
210  out.Set(interpolatedValue);
211  }
212  }
213  }
214  }
215 }
unsigned int GetWidthIndex() const
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
virtual void Set(IType right)=0
unsigned int GetHeightIndex() const
virtual IType Get() const =0
unsigned int GetIndex(const armnn::TensorShape &shape, unsigned int batchIndex, unsigned int channelIndex, unsigned int heightIndex, unsigned int widthIndex) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
unsigned int GetChannelsIndex() const

◆ ReturnWithError()

OptimizationResult armnn::ReturnWithError ( OptimizationResult  res,
const Layer layer,
const BackendSettings backendSettings,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 580 of file Network.cpp.

References GetLayerTypeAsCString(), Layer::GetType(), OptimizationResult::m_Error, BackendSettings::m_PreferredBackends, and ReportError().

Referenced by AssignBackendsIConnectable(), and AttemptBackendAssignment().

584 {
585  std::stringstream failureMsg;
586  failureMsg << "Layer of type " << GetLayerTypeAsCString(layer->GetType())
587  << " is not supported on any preferred backend " << backendSettings.m_PreferredBackends;
588  ReportError(failureMsg.str(), errMessages);
589 
590  res.m_Error = true;
591  return res;
592 }
void ReportError(const std::string &errorMessage, Optional< std::vector< std::string > &> errorMessages)
Definition: Network.cpp:556
const char * GetLayerTypeAsCString(LayerType type)

◆ RunClFunction()

void armnn::RunClFunction ( arm_compute::IFunction &  function,
const CheckLocation location 
)
inline

Definition at line 155 of file ClWorkloadUtils.hpp.

References Error, error, and WrapClError().

Referenced by ClFillWorkload::Execute(), ClPadWorkload::Execute(), ClAdditionWorkload::Execute(), ClSubtractionWorkload::Execute(), ClActivationWorkload::Execute(), ClExpWorkload::Execute(), ClPreluWorkload::Execute(), ClQuantizeWorkload::Execute(), ClConvertFp16ToFp32Workload::Execute(), ClRsqrtWorkload::Execute(), ClSinWorkload::Execute(), ClConvertFp32ToFp16Workload::Execute(), ClAbsWorkload::Execute(), ClLogWorkload::Execute(), ClSqrtWorkload::Execute(), ClLstmFloatWorkload::Execute(), ClCastWorkload::Execute(), ClNegWorkload::Execute(), ClSpaceToDepthWorkload::Execute(), ClNormalizationFloatWorkload::Execute(), ClFloorFloatWorkload::Execute(), ClResizeWorkload::Execute(), ClReshapeWorkload::Execute(), ClGatherWorkload::Execute(), ClInstanceNormalizationWorkload::Execute(), ClBatchToSpaceNdWorkload::Execute(), ClMaximumWorkload::Execute(), ClMinimumWorkload::Execute(), ClArgMinMaxWorkload::Execute(), ClChannelShuffleWorkload::Execute(), ClComparisonWorkload::Execute(), ClSliceWorkload::Execute(), ClL2NormalizationFloatWorkload::Execute(), ClDepthToSpaceWorkload::Execute(), ClDivisionWorkload::Execute(), ClPooling2dWorkload::Execute(), ClStridedSliceWorkload::Execute(), ClGatherNdWorkload::Execute(), ClSpaceToBatchNdWorkload::Execute(), ClPooling3dWorkload::Execute(), ClMultiplicationWorkload::Execute(), ClLogSoftmaxWorkload::Execute(), ClQuantizedLstmWorkload::Execute(), ClSoftmaxWorkload::Execute(), ClBatchNormalizationFloatWorkload::Execute(), ClDepthwiseConvolutionWorkload::Execute(), ClFullyConnectedWorkload::Execute(), ClConvolution3dWorkload::Execute(), ClTransposeWorkload::Execute(), ClTransposeConvolution2dWorkload::Execute(), ClPermuteWorkload::Execute(), and ClConvolution2dWorkload::Execute().

156 {
157  try
158  {
159  function.run();
160  }
161  catch (cl::Error& error)
162  {
163  throw WrapClError(error, location);
164  }
165 }
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)

◆ RuntimeLoadedNetworksReserve()

void RuntimeLoadedNetworksReserve ( armnn::RuntimeImpl runtime)

Definition at line 36 of file RuntimeTests.cpp.

Referenced by TEST_SUITE().

37 {
38  runtime->m_LoadedNetworks.reserve(1);
39 }

◆ SelectTensorHandleStrategy()

OptimizationResult SelectTensorHandleStrategy ( Graph optGraph,
BackendsMap backends,
TensorHandleFactoryRegistry registry,
bool  importEnabled,
Optional< std::vector< std::string > &>  errMessages 
)

Definition at line 1601 of file Network.cpp.

References ARMNN_ASSERT, ARMNN_SCOPED_PROFILING_EVENT, CalculateEdgeStrategy(), CalculateSlotOption(), CalculateSlotOptionForInput(), CalculateSlotOptionForOutput(), Graph::ForEachLayer(), Layer::GetBackendId(), OutputSlot::GetConnections(), Layer::GetNumOutputSlots(), Layer::GetOutputSlot(), Layer::GetType(), Input, ITensorHandleFactory::LegacyFactoryId, OptimizationResult::m_Error, Output, OutputSlot::SetEdgeStrategy(), OutputSlot::SetTensorHandleFactory(), and Undefined.

Referenced by Optimize(), and TEST_SUITE().

1606 {
1607  ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_SelectTensorHandleStrategy");
1608  OptimizationResult result;
1609 
1610  optGraph.ForEachLayer([&backends, &registry, &result, &errMessages, importEnabled](Layer* layer)
1611  {
1612  ARMNN_ASSERT(layer);
1613 
1614  // Lets make sure the backend is in our list of supported backends. Something went wrong during backend
1615  // assignment if this check fails
1616  ARMNN_ASSERT(backends.find(layer->GetBackendId()) != backends.end());
1617 
1618  // Check each output separately
1619  for (unsigned int slotIdx = 0; slotIdx < layer->GetNumOutputSlots(); slotIdx++)
1620  {
1621  OutputSlot& outputSlot = layer->GetOutputSlot(slotIdx);
1622 
1623  ITensorHandleFactory::FactoryId slotOption = ITensorHandleFactory::LegacyFactoryId;
1624 
1625  // Calculate the factory to use which results in the fewest copies being made.
1626  switch(layer->GetType())
1627  {
1628  case LayerType::Input:
1629  slotOption = CalculateSlotOptionForInput(backends, outputSlot, registry, importEnabled);
1630  break;
1631  case LayerType::Output:
1632  slotOption = CalculateSlotOptionForOutput(backends, outputSlot, registry);
1633  break;
1634  default:
1635  slotOption = CalculateSlotOption(backends, outputSlot, registry, importEnabled);
1636  break;
1637  }
1638  outputSlot.SetTensorHandleFactory(slotOption);
1639 
1640  // Now determine the "best" edge strategy for each connection given the slotOption.
1641  unsigned int connectionIdx = 0;
1642  for (auto&& connection : outputSlot.GetConnections())
1643  {
1644  const Layer& connectedLayer = connection->GetOwningLayer();
1645 
1646  EdgeStrategy strategy = CalculateEdgeStrategy(backends, slotOption, *layer, connectedLayer,
1647  registry, importEnabled);
1648 
1649  if (strategy == EdgeStrategy::Undefined)
1650  {
1651  result.m_Error = true;
1652  if (errMessages)
1653  {
1654  errMessages.value().emplace_back("Could not find valid strategy required for compatibility"
1655  " between backends.");
1656  }
1657  return;
1658  }
1659 
1660  outputSlot.SetEdgeStrategy(connectionIdx, strategy);
1661 
1662  connectionIdx++;
1663  }
1664  }
1665  });
1666 
1667  return result;
1668 }
ITensorHandleFactory::FactoryId CalculateSlotOptionForOutput(BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry)
Definition: Network.cpp:1352
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220
ITensorHandleFactory::FactoryId FactoryId
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
ITensorHandleFactory::FactoryId CalculateSlotOption(BackendsMap &backends, OutputSlot &outputSlot, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition: Network.cpp:1362
EdgeStrategy CalculateEdgeStrategy(BackendsMap &backends, ITensorHandleFactory::FactoryId srcFactoryId, const Layer &layer, const Layer &connectedLayer, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition: Network.cpp:1512
ITensorHandleFactory::FactoryId CalculateSlotOptionForInput(BackendsMap &backends, OutputSlot &slot, TensorHandleFactoryRegistry &registry, bool importEnabled)
Definition: Network.cpp:1267

◆ SetAllLoggingSinks()

void SetAllLoggingSinks ( bool  standardOut,
bool  debugOut,
bool  coloured 
)

Definition at line 191 of file Logging.cpp.

Referenced by SimpleLogger< Level >::AddSink(), ConfigureLogging(), main(), and TEST_SUITE().

192 {
193  SetLoggingSinks<LogSeverity::Trace>(standardOut, debugOut, coloured);
194  SetLoggingSinks<LogSeverity::Debug>(standardOut, debugOut, coloured);
195  SetLoggingSinks<LogSeverity::Info>(standardOut, debugOut, coloured);
196  SetLoggingSinks<LogSeverity::Warning>(standardOut, debugOut, coloured);
197  SetLoggingSinks<LogSeverity::Error>(standardOut, debugOut, coloured);
198  SetLoggingSinks<LogSeverity::Fatal>(standardOut, debugOut, coloured);
199 }

◆ SetClSliceData()

auto armnn::SetClSliceData ( const std::vector< unsigned int > &  m_begin,
const std::vector< unsigned int > &  m_size 
)
inline

Definition at line 91 of file ClWorkloadUtils.hpp.

Referenced by ClSliceWorkload::ClSliceWorkload().

93 {
94  // This function must translate the size vector given to an end vector
95  // expected by the ACL NESlice workload
98 
99  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
100 
101  // For strided slices, we have the relationship size = (end - begin) / stride
102  // For slice, we assume stride to be a vector of all ones, yielding the formula
103  // size = (end - begin) therefore we know end = size + begin
104  for (unsigned int i = 0; i < num_dims; i++)
105  {
106  unsigned int revertedIndex = num_dims - i - 1;
107 
108  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
109  ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
110  }
111 
112  return std::make_tuple(starts, ends);
113 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates

◆ SetClStridedSliceData()

auto armnn::SetClStridedSliceData ( const std::vector< int > &  m_begin,
const std::vector< int > &  m_end,
const std::vector< int > &  m_stride 
)
inline

Definition at line 70 of file ClWorkloadUtils.hpp.

Referenced by ClStridedSliceWorkload::ClStridedSliceWorkload().

73 {
77 
78  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
79 
80  for (unsigned int i = 0; i < num_dims; i++) {
81  unsigned int revertedIndex = num_dims - i - 1;
82 
83  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
84  ends.set(i, static_cast<int>(m_end[revertedIndex]));
85  strides.set(i, static_cast<int>(m_stride[revertedIndex]));
86  }
87 
88  return std::make_tuple(starts, ends, strides);
89 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates

◆ SetLogFilter()

void SetLogFilter ( LogSeverity  level)

Definition at line 73 of file Logging.cpp.

References ARMNN_ASSERT, ARMNN_FALLTHROUGH, Debug, SimpleLogger< Level >::Enable(), Error, Fatal, SimpleLogger< Level >::Get(), IgnoreUnused(), Info, Trace, and Warning.

Referenced by SimpleLogger< Level >::AddSink(), ConfigureLogging(), main(), and TEST_SUITE().

74 {
75  SimpleLogger<LogSeverity::Trace>::Get().Enable(false);
76  SimpleLogger<LogSeverity::Debug>::Get().Enable(false);
77  SimpleLogger<LogSeverity::Info>::Get().Enable(false);
78  SimpleLogger<LogSeverity::Warning>::Get().Enable(false);
79  SimpleLogger<LogSeverity::Error>::Get().Enable(false);
80  SimpleLogger<LogSeverity::Fatal>::Get().Enable(false);
81  switch (level)
82  {
83  case LogSeverity::Trace:
84  SimpleLogger<LogSeverity::Trace>::Get().Enable(true);
86  case LogSeverity::Debug:
87  SimpleLogger<LogSeverity::Debug>::Get().Enable(true);
89  case LogSeverity::Info:
90  SimpleLogger<LogSeverity::Info>::Get().Enable(true);
92  case LogSeverity::Warning:
93  SimpleLogger<LogSeverity::Warning>::Get().Enable(true);
95  case LogSeverity::Error:
96  SimpleLogger<LogSeverity::Error>::Get().Enable(true);
98  case LogSeverity::Fatal:
99  SimpleLogger<LogSeverity::Fatal>::Get().Enable(true);
100  break;
101  default:
102  ARMNN_ASSERT(false);
103  }
104 }
void Debug(const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
Definition: Debug.cpp:19
#define ARMNN_FALLTHROUGH
Definition: Utils.hpp:37
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ SetLoggingSinks()

void armnn::SetLoggingSinks ( bool  standardOut,
bool  debugOut,
bool  coloured 
)
inline

Definition at line 167 of file Logging.cpp.

References SimpleLogger< Level >::AddSink(), SimpleLogger< Level >::Get(), and SimpleLogger< Level >::RemoveAllSinks().

168 {
169  SimpleLogger<Level>::Get().RemoveAllSinks();
170 
171  if (standardOut)
172  {
173  if (coloured)
174  {
175  SimpleLogger<Level>::Get().AddSink(
176  std::make_shared<StandardOutputColourSink>(Level));
177  } else
178  {
179  SimpleLogger<Level>::Get().AddSink(
180  std::make_shared<StandardOutputSink>());
181  }
182  }
183 
184  if (debugOut)
185  {
186  SimpleLogger<Level>::Get().AddSink(
187  std::make_shared<DebugOutputSink>());
188  }
189 }

◆ SetNeonSliceData()

auto armnn::SetNeonSliceData ( const std::vector< unsigned int > &  m_begin,
const std::vector< unsigned int > &  m_size 
)
inline

Definition at line 113 of file NeonWorkloadUtils.hpp.

References GetOutputTensorData(), and ITensorHandle::Map().

Referenced by NeonSliceWorkload::NeonSliceWorkload().

115 {
116  // This function must translate the size vector given to an end vector
117  // expected by the ACL NESlice workload
120 
121  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
122 
123  // For strided slices, we have the relationship size = (end - begin) / stride
124  // For slice, we assume stride to be a vector of all ones, yielding the formula
125  // size = (end - begin) therefore we know end = size + begin
126  for (unsigned int i = 0; i < num_dims; i++)
127  {
128  unsigned int revertedIndex = num_dims - i - 1;
129 
130  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
131  ends.set(i, static_cast<int>(m_begin[revertedIndex] + m_size[revertedIndex]));
132  }
133 
134  return std::make_tuple(starts, ends);
135 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates

◆ SetNeonStridedSliceData()

auto armnn::SetNeonStridedSliceData ( const std::vector< int > &  m_begin,
const std::vector< int > &  m_end,
const std::vector< int > &  m_stride 
)
inline

Definition at line 91 of file NeonWorkloadUtils.hpp.

Referenced by NeonStridedSliceWorkload::NeonStridedSliceWorkload().

94 {
98 
99  unsigned int num_dims = static_cast<unsigned int>(m_begin.size());
100 
101  for (unsigned int i = 0; i < num_dims; i++)
102  {
103  unsigned int revertedIndex = num_dims - i - 1;
104 
105  starts.set(i, static_cast<int>(m_begin[revertedIndex]));
106  ends.set(i, static_cast<int>(m_end[revertedIndex]));
107  strides.set(i, static_cast<int>(m_stride[revertedIndex]));
108  }
109 
110  return std::make_tuple(starts, ends, strides);
111 }
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates

◆ SetValueChecked()

◆ Slice()

void Slice ( const TensorInfo inputInfo,
const SliceDescriptor descriptor,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 14 of file Slice.cpp.

References ARMNN_ASSERT, TensorShape::GetNumDimensions(), TensorInfo::GetShape(), IgnoreUnused(), SliceDescriptor::m_Begin, and SliceDescriptor::m_Size.

Referenced by TEST_SUITE().

19 {
20  const TensorShape& inputShape = inputInfo.GetShape();
21  const unsigned int numDims = inputShape.GetNumDimensions();
22 
23  ARMNN_ASSERT(descriptor.m_Begin.size() == numDims);
24  ARMNN_ASSERT(descriptor.m_Size.size() == numDims);
25 
26  constexpr unsigned int maxNumDims = 4;
27  ARMNN_ASSERT(numDims <= maxNumDims);
28 
29  std::vector<unsigned int> paddedInput(4);
30  std::vector<unsigned int> paddedBegin(4);
31  std::vector<unsigned int> paddedSize (4);
32 
33  const unsigned int numPaddingDims = maxNumDims - numDims;
34  for (unsigned int i = 0u; i < maxNumDims; ++i)
35  {
36  if (i < numPaddingDims)
37  {
38  paddedInput[i] = 1u;
39  paddedBegin[i] = 0u;
40  paddedSize[i] = 1u;
41  }
42  else
43  {
44  const unsigned int j = i - numPaddingDims;
45  paddedInput[i] = inputShape[j];
46  paddedBegin[i] = descriptor.m_Begin[j];
47  paddedSize[i] = descriptor.m_Size[j];
48  }
49  }
50 
51  unsigned int dim0 = paddedInput[0];
52  unsigned int dim1 = paddedInput[1];
53  unsigned int dim2 = paddedInput[2];
54  unsigned int dim3 = paddedInput[3];
55 
56  unsigned int begin0 = paddedBegin[0];
57  unsigned int begin1 = paddedBegin[1];
58  unsigned int begin2 = paddedBegin[2];
59  unsigned int begin3 = paddedBegin[3];
60 
61  unsigned int size0 = paddedSize[0];
62  unsigned int size1 = paddedSize[1];
63  unsigned int size2 = paddedSize[2];
64  unsigned int size3 = paddedSize[3];
65 
66  ARMNN_ASSERT(begin0 + size0 <= dim0);
67  ARMNN_ASSERT(begin1 + size1 <= dim1);
68  ARMNN_ASSERT(begin2 + size2 <= dim2);
69  ARMNN_ASSERT(begin3 + size3 <= dim3);
70 
71  const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
72  unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
73 
74  IgnoreUnused(dim0);
75  for (unsigned int idx0 = begin0; idx0 < begin0 + size0; ++idx0)
76  {
77  for (unsigned int idx1 = begin1; idx1 < begin1 + size1; ++idx1)
78  {
79  for (unsigned int idx2 = begin2; idx2 < begin2 + size2; ++idx2)
80  {
81  for (unsigned int idx3 = begin3; idx3 < begin3 + size3; ++idx3)
82  {
83  const unsigned int inputOffset =
84  (((idx0 * dim1 + idx1) * dim2 + idx2) * dim3 + idx3) * dataTypeSize;
85 
86  ::memcpy(output, input + inputOffset, dataTypeSize);
87  output += dataTypeSize;
88  }
89  }
90  }
91  }
92 }
void IgnoreUnused(Ts &&...)
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14

◆ Softmax()

void Softmax ( Decoder< float > &  in,
Encoder< float > &  out,
const TensorInfo inputTensorInfo,
float  beta,
int  axis 
)

Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo.

Definition at line 17 of file Softmax.cpp.

References ARMNN_ASSERT_MSG, Decoder< IType >::Get(), TensorShape::GetNumDimensions(), TensorInfo::GetNumDimensions(), armnnUtils::GetNumElementsBetween(), TensorInfo::GetShape(), and Encoder< IType >::Set().

Referenced by TEST_SUITE().

18 {
19  ARMNN_ASSERT_MSG(axis < static_cast<int>(inputTensorInfo.GetNumDimensions()),
20  "Required axis index greater than number of dimensions.");
21  ARMNN_ASSERT_MSG(axis >= -static_cast<int>(inputTensorInfo.GetNumDimensions()),
22  "Required axis index lower than negative of the number of dimensions");
23 
24  unsigned int uAxis = axis < 0 ?
25  inputTensorInfo.GetNumDimensions() - static_cast<unsigned int>(abs(axis))
26  : static_cast<unsigned int>(axis);
27 
28  const TensorShape& inputShape = inputTensorInfo.GetShape();
29  const unsigned int outerSize = armnnUtils::GetNumElementsBetween(inputShape, 0, uAxis);
30  const unsigned int axisSize = inputShape[uAxis];
31  const unsigned int innerSize = armnnUtils::GetNumElementsBetween(inputShape,
32  uAxis + 1,
33  inputShape.GetNumDimensions());
34 
35  for (unsigned int outer = 0; outer < outerSize; ++outer)
36  {
37  unsigned int inputBeginIdx = outer * axisSize * innerSize;
38  unsigned int inputEndIdx = inputBeginIdx + axisSize * innerSize;
39  unsigned int outputBeginIdx = outer * axisSize * innerSize;
40 
41  for (unsigned int inner = 0; inner < innerSize; ++inner, ++inputBeginIdx, ++inputEndIdx, ++outputBeginIdx)
42  {
43  // Find max
44  float maxValue = std::numeric_limits<float>::lowest();
45  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
46  {
47  in[iter];
48  maxValue = std::max(maxValue, in.Get());
49  }
50 
51  // Compute sum
52  float sum = 0.0f;
53  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize)
54  {
55  in[iter];
56  sum += std::exp((in.Get() - maxValue) * beta);
57  }
58 
59  // Compute result
60  unsigned int outputIter = outputBeginIdx;
61  out[outputIter];
62  for (unsigned int iter = inputBeginIdx; iter < inputEndIdx; iter += innerSize, outputIter += innerSize)
63  {
64  out[outputIter];
65  in[iter];
66  out.Set(std::exp((in.Get() - maxValue) * beta) / sum);
67  }
68  }
69  }
70 }
unsigned int GetNumElementsBetween(const armnn::TensorShape &shape, unsigned int firstAxisInclusive, unsigned int lastAxisExclusive)
virtual void Set(IType right)=0
virtual IType Get() const =0
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

◆ SpaceToBatchNd()

void SpaceToBatchNd ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const SpaceToBatchNdDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 34 of file SpaceToBatchNd.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToBatchNdDescriptor::m_BlockShape, SpaceToBatchNdDescriptor::m_DataLayout, SpaceToBatchNdDescriptor::m_PadList, Encoder< IType >::Set(), and SpaceToBatchNd().

Referenced by SpaceToBatchNd(), SpaceToBatchNdLayer::SpaceToBatchNdLayer(), and TEST_SUITE().

39 {
40  DataLayoutIndexed dataLayout = params.m_DataLayout;
41 
42  const TensorShape& inputShape = inputInfo.GetShape();
43  const TensorShape& outputShape = outputInfo.GetShape();
44 
45  const unsigned int channels = inputShape[dataLayout.GetChannelsIndex()];
46 
47  const unsigned int inputBatchSize = inputShape[0];
48  const unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
49  const unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
50 
51  const unsigned int outputBatchSize = outputShape[0];
52  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
53  const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
54 
55  const unsigned int blockHeight = params.m_BlockShape[0];
56  const unsigned int blockWidth = params.m_BlockShape[1];
57 
58  const unsigned int paddingTop = params.m_PadList[0].first;
59  const unsigned int paddingLeft = params.m_PadList[1].first;
60 
61  for (unsigned int outB = 0; outB < outputBatchSize; outB++)
62  {
63  unsigned int inB = outB % inputBatchSize;
64 
65  unsigned int shiftW = (outB / inputBatchSize) % blockWidth;
66  unsigned int shiftH = (outB / inputBatchSize) / blockWidth;
67 
68  for (unsigned int outH = 0; outH < outputHeight; outH++)
69  {
70  for (unsigned int outW = 0; outW < outputWidth; outW++)
71  {
72  if (outH * blockHeight + shiftH < paddingTop ||
73  outH * blockHeight + shiftH >= paddingTop + inputHeight ||
74  outW * blockWidth + shiftW < paddingLeft ||
75  outW * blockWidth + shiftW >= paddingLeft + inputWidth)
76  {
77  for (unsigned int c = 0; c < channels; c++)
78  {
79  unsigned int outOffset = GetOffset(outputShape,
80  outB,
81  outH,
82  outW,
83  c,
84  dataLayout);
85  outputData += outOffset;
86  outputData.Set(0);
87  outputData -= outOffset;
88  }
89  }
90  else
91  {
92  for (unsigned int c = 0; c < channels; c++)
93  {
94  unsigned int inOffset = GetOffset(inputShape,
95  inB,
96  (outH * blockHeight + shiftH) - paddingTop,
97  (outW * blockWidth + shiftW) - paddingLeft,
98  c,
99  dataLayout);
100 
101  unsigned int outOffset = GetOffset(outputShape,
102  outB,
103  outH,
104  outW,
105  c,
106  dataLayout);
107 
108  outputData += outOffset;
109  inputData += inOffset;
110  outputData.Set(inputData.Get());
111  inputData -= inOffset;
112  outputData -= outOffset;
113  }
114  }
115  }
116  }
117  }
118 }
unsigned int GetWidthIndex() const
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
virtual void Set(IType right)=0
std::vector< std::pair< unsigned int, unsigned int > > m_PadList
Specifies the padding values for the input dimension: heightPad{top, bottom} widthPad{left, right}.
unsigned int GetHeightIndex() const
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
virtual IType Get() const =0
unsigned int GetOffset(const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
std::vector< unsigned int > m_BlockShape
Block shape value.
unsigned int GetChannelsIndex() const

◆ SpaceToDepth()

void SpaceToDepth ( const TensorInfo inputInfo,
const TensorInfo outputInfo,
const SpaceToDepthDescriptor params,
Decoder< float > &  inputData,
Encoder< float > &  outputData 
)

Definition at line 36 of file SpaceToDepth.cpp.

References Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), GetOffset(), TensorInfo::GetShape(), DataLayoutIndexed::GetWidthIndex(), SpaceToDepthDescriptor::m_BlockSize, SpaceToDepthDescriptor::m_DataLayout, Encoder< IType >::Set(), and SpaceToDepth().

Referenced by SpaceToDepth(), SpaceToDepthLayer::SpaceToDepthLayer(), and TEST_SUITE().

41 {
42  DataLayoutIndexed dataLayout = params.m_DataLayout;
43 
44  const TensorShape& inputShape = inputInfo.GetShape();
45  const TensorShape& outputShape = outputInfo.GetShape();
46 
47  const unsigned int inputBatchSize = inputShape[0];
48  const unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
49 
50  const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
51  const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
52  const unsigned int outputChannels = outputShape[dataLayout.GetChannelsIndex()];
53 
54  const unsigned int blockSize = params.m_BlockSize;
55 
56  if (blockSize == 0)
57  {
59  "Input shape must be divisible by block size in all spatial dimensions: Block size is"
60  " equal to zero");
61  }
62 
63  for (unsigned int outChannelIndex = 0; outChannelIndex < outputChannels; outChannelIndex++)
64  {
65  unsigned int inChannelIndex = outChannelIndex % inputChannels;
66 
67  unsigned int shiftW = (outChannelIndex / inputChannels) % blockSize;
68  unsigned int shiftH = (outChannelIndex / inputChannels) / blockSize;
69 
70  for (unsigned int outH = 0; outH < outputHeight; outH++)
71  {
72  for (unsigned int outW = 0; outW < outputWidth; outW++)
73  {
74  for (unsigned int inBatchIndex = 0; inBatchIndex < inputBatchSize; inBatchIndex++)
75  {
76  unsigned int inOffset = GetOffset(inputShape,
77  inChannelIndex,
78  (outH * blockSize + shiftH),
79  (outW * blockSize + shiftW),
80  inBatchIndex,
81  dataLayout);
82 
83  unsigned int outOffset = GetOffset(outputShape,
84  outChannelIndex,
85  outH,
86  outW,
87  inBatchIndex,
88  dataLayout);
89 
90  outputData += outOffset;
91  inputData += inOffset;
92  outputData.Set(inputData.Get());
93  inputData -= inOffset;
94  outputData -= outOffset;
95  }
96  }
97  }
98  }
99 }
unsigned int GetWidthIndex() const
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
virtual void Set(IType right)=0
unsigned int GetHeightIndex() const
virtual IType Get() const =0
unsigned int GetOffset(const TensorShape &shape, unsigned int b, unsigned int h, unsigned int w, unsigned int c, const DataLayoutIndexed &dataLayout)
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...
unsigned int m_BlockSize
Scalar specifying the input block size. It must be >= 1.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
unsigned int GetChannelsIndex() const

◆ Split()

void Split ( const SplitterQueueDescriptor data,
std::vector< ITensorHandle *>  inputs,
std::vector< ITensorHandle *>  outputs 
)

Definition at line 21 of file Splitter.cpp.

References ARMNN_ASSERT, Encoder< IType >::Get(), TensorInfo::GetNumDimensions(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, and MaxNumOfTensorDimensions.

Referenced by RefSplitterWorkload::ExecuteAsync(), and Splitter().

24 {
25  const TensorInfo& inputInfo = GetTensorInfo(inputs[0]);
26 
27  std::unique_ptr<Decoder<float>> decoderPtr =
28  MakeDecoder<float>(inputInfo, inputs[0]->Map());
29  Decoder<float>& decoder = *decoderPtr;
30 
31  for (unsigned int index = 0; index < inputInfo.GetNumElements(); ++index)
32  {
33  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
34 
35  unsigned int indexRemainder = index;
36  unsigned int dimensionStride = inputInfo.GetNumElements();
37 
38  for (unsigned int i = 0; i<inputInfo.GetNumDimensions(); i++)
39  {
40  dimensionStride /= inputInfo.GetShape()[i];
41  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
42  indexRemainder -= indices[i] * dimensionStride;
43  }
44 
45  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
46  {
47  SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
48 
49  //Split view extents are defined by the size of (the corresponding) input tensor.
50  const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
51  ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo.GetNumDimensions());
52 
53  // Check all dimensions to see if this element is inside the given input view.
54  bool insideView = true;
55  for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
56  {
57  if (indices[i] < view.m_Origin[i])
58  {
59  insideView = false;
60  }
61  if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
62  {
63  insideView = false;
64  }
65  }
66 
67  if (insideView)
68  {
69  std::unique_ptr<Encoder<float>> encoderPtr =
70  MakeEncoder<float>(outputInfo, outputs[viewIdx]->Map());
71  Encoder<float>& encoder = *encoderPtr;
72 
73  unsigned int outIndex = 0;
74  unsigned int dimensionStride = 1;
75  float inputValue = 0.f;
76 
77  for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
78  {
79  outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
80  dimensionStride *= outputInfo.GetShape()[i];
81  }
82 
83  decoder += index;
84  inputValue = decoder.Get();
85  decoder -= index;
86 
87  encoder += outIndex;
88  encoder.Set(inputValue);
89  break;
90  }
91  }
92  }
93 }
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31

◆ Splitter()

void armnn::Splitter ( const SplitterQueueDescriptor data,
std::vector< ITensorHandle *>  inputs,
std::vector< ITensorHandle *>  outputs 
)

Definition at line 17 of file Splitter.hpp.

References ARMNN_ASSERT, TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), GetTensorInfo(), SplitterQueueDescriptor::ViewOrigin::m_Origin, SplitterQueueDescriptor::m_ViewOrigins, MaxNumOfTensorDimensions, and Split().

Referenced by TEST_SUITE().

20 {
21  const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
22 
23  for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
24  {
25  unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
26 
27  unsigned int indexRemainder = index;
28  unsigned int dimensionStride = inputInfo0.GetNumElements();
29 
30  for (unsigned int i = 0; i<inputInfo0.GetNumDimensions(); i++)
31  {
32  dimensionStride /= inputInfo0.GetShape()[i];
33  indices[i] = indexRemainder / dimensionStride; // Use integer division to round down.
34  indexRemainder -= indices[i] * dimensionStride;
35  }
36 
37  for (unsigned int viewIdx = 0; viewIdx < data.m_ViewOrigins.size(); ++viewIdx)
38  {
39  SplitterQueueDescriptor::ViewOrigin const& view = data.m_ViewOrigins[viewIdx];
40 
41  //Split view extents are defined by the size of (the corresponding) input tensor.
42  const TensorInfo& outputInfo = GetTensorInfo(outputs[viewIdx]);
43  ARMNN_ASSERT(outputInfo.GetNumDimensions() == inputInfo0.GetNumDimensions());
44 
45  // Check all dimensions to see if this element is inside the given input view.
46  bool insideView = true;
47  for (unsigned int i = 0; i<outputInfo.GetNumDimensions(); i++)
48  {
49  if (indices[i] < view.m_Origin[i])
50  {
51  insideView = false;
52  }
53  if (indices[i] >= view.m_Origin[i] + outputInfo.GetShape()[i])
54  {
55  insideView = false;
56  }
57  }
58 
59  if (insideView)
60  {
61  unsigned int outIndex = 0;
62  unsigned int dimensionStride = 1;
63 
64  for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;)
65  {
66  outIndex += dimensionStride * (indices[i] - view.m_Origin[i]);
67  dimensionStride *= outputInfo.GetShape()[i];
68  }
69 
70  //We are within the view, to copy input data to the output corresponding to this view.
71  DataType* outputData = GetOutputTensorData<DataType>(viewIdx, data);
72  ARMNN_ASSERT(outputData);
73 
74  const DataType* inputData = GetInputTensorData<DataType>(0, data);
75  ARMNN_ASSERT(inputData);
76 
77  outputData[outIndex] = inputData[index];
78  }
79  }
80  }
81 }
DataType
Definition: Types.hpp:48
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches, unsigned int numberOfChannels, unsigned int height, unsigned int width, const armnn::DataLayout dataLayout, const armnn::DataType dataType)
Definition: TensorUtils.cpp:38
constexpr unsigned int MaxNumOfTensorDimensions
Definition: Types.hpp:31

◆ Stack()

void Stack ( const StackQueueDescriptor data,
std::vector< std::unique_ptr< Decoder< float >>> &  inputs,
Encoder< float > &  output,
const TensorInfo inputInfo,
const TensorInfo outputInfo 
)

Definition at line 12 of file Stack.cpp.

References TensorInfo::GetNumDimensions(), TensorInfo::GetNumElements(), TensorInfo::GetShape(), StackDescriptor::m_Axis, QueueDescriptor::m_Inputs, StackDescriptor::m_NumInputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, and Encoder< IType >::Set().

Referenced by TEST_SUITE().

17 {
18  unsigned int outputNumDims = outputInfo.GetNumDimensions();
19  unsigned int inputNumDims = inputInfo.GetNumDimensions();
20 
21  const armnn::TensorShape& outputDims = outputInfo.GetShape();
22  const armnn::TensorShape& inputDims = inputInfo.GetShape();
23 
24  unsigned int axis = data.m_Parameters.m_Axis;
25 
26  // Can perform a simple concatenation when axis == 0
27  if (!axis)
28  {
29  unsigned int numInputs = data.m_Parameters.m_NumInputs;
30  unsigned int inputLength = inputInfo.GetNumElements();
31 
32  for (unsigned int inputIdx=0; inputIdx<numInputs; ++inputIdx)
33  {
34  for (unsigned int elmt=0; elmt<inputLength; ++elmt)
35  {
36  (*inputs[inputIdx])[elmt];
37  output[(inputIdx * inputLength) + elmt];
38  output.Set(inputs[inputIdx]->Get());
39  }
40  }
41  return;
42  }
43 
44  const unsigned int iNumTensors = static_cast<unsigned int>(data.m_Inputs.size());
45  const unsigned int iBatchSize = inputDims[0];
46  const unsigned int iChannels = (inputNumDims > 1) ? inputDims[1] : 1;
47  const unsigned int iHeight = (inputNumDims > 2) ? inputDims[2] : 1;
48  const unsigned int iWidth = (inputNumDims > 3) ? inputDims[3] : 1;
49 
50  const unsigned int oBatchSize = outputDims[1];
51  const unsigned int oChannels = (outputNumDims > 2) ? outputDims[2] : 1;
52  const unsigned int oHeight = (outputNumDims > 3) ? outputDims[3] : 1;
53  const unsigned int oWidth = (outputNumDims > 4) ? outputDims[4] : 1;
54 
55  // Array to store the input coordinates
56  // iCoordinates[0] = i, iCoordinates[1] = bi, iCoordinates[2] = ci
57  // iCoordinates[3] = hi, iCoordinates[4] = wi, iCoordinates[5] = 0
58  // iCoordinates[5] will be always zero and used for not incrementing
59  // the output when the input has less than 4 dimensions
60  std::array<unsigned int, 6> iCoordinates{ 0 };
61 
62  // Array of pointers used to map the output coordinates to the input ones, in accordance with the axis
63  // This array is initialized with &iCoordinates[5] since this will be always zero
64  std::array<unsigned int *, 5> oCoordinates = { &iCoordinates[5],
65  &iCoordinates[5],
66  &iCoordinates[5],
67  &iCoordinates[5],
68  &iCoordinates[5] };
69 
70  // Set the axis coordinate
71  oCoordinates[axis] = &iCoordinates[0];
72 
73  // Map the output coordinates, accounting for the axis
74  unsigned int dim_shift = 0;
75  for(unsigned int dim = 0; dim < inputNumDims; ++dim)
76  {
77  if(dim == axis)
78  {
79  dim_shift++;
80  }
81  oCoordinates[dim + dim_shift] = &iCoordinates[dim + 1];
82  }
83 
84  // Alias for the input coordinates
85  unsigned int &i = iCoordinates[0];
86  unsigned int &bi = iCoordinates[1];
87  unsigned int &ci = iCoordinates[2];
88  unsigned int &hi = iCoordinates[3];
89  unsigned int &wi = iCoordinates[4];
90 
91  // Alias for the output coordinates
92  unsigned int &o = *(oCoordinates[0]);
93  unsigned int &bo = *(oCoordinates[1]);
94  unsigned int &co = *(oCoordinates[2]);
95  unsigned int &ho = *(oCoordinates[3]);
96  unsigned int &wo = *(oCoordinates[4]);
97 
98  // Stack tensors
99  for(; i < iNumTensors; ++(i))
100  {
101  for(bi = 0; bi < iBatchSize; ++(bi))
102  {
103  for(ci = 0; ci < iChannels; ++(ci))
104  {
105  for(hi = 0; hi < iHeight; ++(hi))
106  {
107  for(wi = 0; wi < iWidth; ++(wi))
108  {
109  output[o * oWidth * oHeight * oChannels * oBatchSize +
110  bo * oWidth * oHeight * oChannels +
111  co * oWidth * oHeight +
112  ho * oWidth +
113  wo];
114 
115  output.Set(inputs[i]->Get());
116 
117  ++(*(inputs[i]));
118  }
119  }
120  }
121  }
122  }
123 }
unsigned int GetNumElements() const
Function that calculates the tensor elements by multiplying all dimension size which are Specified...
Definition: Tensor.cpp:181
virtual void Set(IType right)=0

◆ StrEqual()

constexpr bool armnn::StrEqual ( const char *  strA,
const char(&)  strB[N] 
)

Definition at line 170 of file TypesUtils.hpp.

Referenced by ParseComputeDevice().

171 {
172  bool isEqual = true;
173  for (unsigned i = 0; isEqual && (i < N); ++i)
174  {
175  isEqual = (strA[i] == strB[i]);
176  }
177  return isEqual;
178 }

◆ StridedSlice()

void StridedSlice ( const TensorInfo inputInfo,
const StridedSliceDescriptor params,
const void *  inputData,
void *  outputData,
unsigned int  dataTypeSize 
)

Definition at line 90 of file StridedSlice.cpp.

References TensorInfo::GetShape(), and numeric_cast().

Referenced by TEST_SUITE().

95 {
96  const unsigned char* input = reinterpret_cast<const unsigned char*>(inputData);
97  unsigned char* output = reinterpret_cast<unsigned char*>(outputData);
98 
99  const TensorShape inputShape = ExtendShape(inputInfo.GetShape(), 4);
100 
101  StridedSliceDescriptor paddedParams = params;
102 
103  // Pad parameters to 4 dimensions
104  PadParams(paddedParams, 4);
105 
106  const int start0 = paddedParams.GetStartForAxis(inputShape, 0);
107  const int stop0 = paddedParams.GetStopForAxis (inputShape, 0, start0);
108 
109  const int start1 = paddedParams.GetStartForAxis(inputShape, 1);
110  const int stop1 = paddedParams.GetStopForAxis (inputShape, 1, start1);
111 
112  const int start2 = paddedParams.GetStartForAxis(inputShape, 2);
113  const int stop2 = paddedParams.GetStopForAxis (inputShape, 2, start2);
114 
115  const int start3 = paddedParams.GetStartForAxis(inputShape, 3);
116  const int stop3 = paddedParams.GetStopForAxis (inputShape, 3, start3);
117 
118  const int step = armnn::numeric_cast<int>(dataTypeSize);
119 
120  for (int in0 = start0;
121  !LoopCondition(in0, stop0, paddedParams.m_Stride[0]);
122  in0 += paddedParams.m_Stride[0])
123  {
124  for (int in1 = start1;
125  !LoopCondition(in1, stop1, paddedParams.m_Stride[1]);
126  in1 += paddedParams.m_Stride[1])
127  {
128  for (int in2 = start2;
129  !LoopCondition(in2, stop2, paddedParams.m_Stride[2]);
130  in2 += paddedParams.m_Stride[2])
131  {
132  for (int in3 = start3;
133  !LoopCondition(in3, stop3, paddedParams.m_Stride[3]);
134  in3 += paddedParams.m_Stride[3])
135  {
136  int dim1 = armnn::numeric_cast<int>(inputShape[1]);
137  int dim2 = armnn::numeric_cast<int>(inputShape[2]);
138  int dim3 = armnn::numeric_cast<int>(inputShape[3]);
139 
140  int inputOffset = (((in0 * dim1 + in1) * dim2 + in2) * dim3 + in3) * step;
141  ::memcpy(output, input + inputOffset, dataTypeSize);
142  output += step;
143  }
144  }
145  }
146  }
147 }
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ StringToLogLevel()

LogSeverity armnn::StringToLogLevel ( std::string  level)
inline

Definition at line 36 of file Logging.hpp.

References Debug, Error, Fatal, Info, Trace, and Warning.

Referenced by DelegateOptions::SetLoggingSeverity().

37 {
38  // Transfer to lower case
39  std::transform(level.begin(), level.end(), level.begin(),
40  [](unsigned char c){ return std::tolower(c); }
41  );
42 
43  if (level == "trace")
44  {
45  return LogSeverity::Trace;
46  }
47  else if (level == "debug")
48  {
49  return LogSeverity::Debug;
50  }
51  else if (level == "info")
52  {
53  return LogSeverity::Info;
54  }
55  else if (level == "warning")
56  {
57  return LogSeverity::Warning;
58  }
59  else if (level == "error")
60  {
61  return LogSeverity::Error;
62  }
63  else if (level == "fatal")
64  {
65  return LogSeverity::Fatal;
66  }
67  else
68  {
69  throw armnn::Exception("Unknown severity level for logging: '" + level +
70  "'. Valid options: trace, debug, info, warning, error, fatal");
71  }
72 }
void Debug(const TensorInfo &inputInfo, const T *inputData, LayerGuid guid, const std::string &layerName, unsigned int slotIndex)
Definition: Debug.cpp:19
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

◆ swap() [1/2]

void armnn::swap ( OriginsDescriptor first,
OriginsDescriptor second 
)

Definition at line 350 of file Descriptors.cpp.

References ViewsDescriptor::swap, and swap().

Referenced by FullyConnectedFloat32Test(), FullyConnectedLargeTestCommon(), BackendId::operator=(), SquashEqualSiblingsImpl< Comparable >::Run(), BackendRegistry::Swap(), and TEST_SUITE().

351 {
352  using std::swap;
353  swap(first.m_NumViews, second.m_NumViews);
354  swap(first.m_NumDimensions, second.m_NumDimensions);
355  swap(first.m_ViewOrigins, second.m_ViewOrigins);
356  swap(first.m_ConcatAxis, second.m_ConcatAxis);
357 }
void swap(ViewsDescriptor &first, ViewsDescriptor &second)

◆ swap() [2/2]

void armnn::swap ( ViewsDescriptor first,
ViewsDescriptor second 
)

Definition at line 359 of file Descriptors.cpp.

References ViewsDescriptor::swap.

Referenced by swap().

360 {
361  using std::swap;
362  swap(first.m_Origins, second.m_Origins);
363  swap(first.m_ViewSizes, second.m_ViewSizes);
364 }
void swap(ViewsDescriptor &first, ViewsDescriptor &second)

◆ TEST_SUITE() [1/3]

armnn::TEST_SUITE ( "TestInputOutputLayerVisitor"  )

Definition at line 13 of file TestInputOutputLayerVisitor.cpp.

References NetworkImpl::AddInputLayer(), NetworkImpl::AddOutputLayer(), and IConnectableLayer::ExecuteStrategy().

14 {
15 TEST_CASE("CheckInputLayerVisitorBindingIdAndName")
16 {
17  const char* layerName = "InputLayer";
18  TestInputLayerVisitor visitor(1, layerName);
19  NetworkImpl net;
20 
21  IConnectableLayer *const layer = net.AddInputLayer(1, layerName);
22  layer->ExecuteStrategy(visitor);
23 }
24 
25 TEST_CASE("CheckInputLayerVisitorBindingIdAndNameNull")
26 {
27  TestInputLayerVisitor visitor(1);
28  NetworkImpl net;
29 
30  IConnectableLayer *const layer = net.AddInputLayer(1);
31  layer->ExecuteStrategy(visitor);
32 }
33 
34 TEST_CASE("CheckOutputLayerVisitorBindingIdAndName")
35 {
36  const char* layerName = "OutputLayer";
37  TestOutputLayerVisitor visitor(1, layerName);
38  NetworkImpl net;
39 
40  IConnectableLayer *const layer = net.AddOutputLayer(1, layerName);
41  layer->ExecuteStrategy(visitor);
42 }
43 
44 TEST_CASE("CheckOutputLayerVisitorBindingIdAndNameNull")
45 {
46  TestOutputLayerVisitor visitor(1);
47  NetworkImpl net;
48 
49  IConnectableLayer *const layer = net.AddOutputLayer(1);
50  layer->ExecuteStrategy(visitor);
51 }
52 
53 }

◆ TEST_SUITE() [2/3]

armnn::TEST_SUITE ( "MemoryManagerTests"  )

Unit test Storing, Allocating and Deallocating with a custom allocator.

Definition at line 53 of file MemoryManagerTests.cpp.

References MemoryManager::Allocate(), MemoryManager::Deallocate(), and MemoryManager::StoreMemToAllocate().

54 {
55 /// Unit test Storing, Allocating and Deallocating with a custom allocator.
56 TEST_CASE("MemoryManagerTest")
57 {
58  using namespace armnn;
59 
60  // Create mock up bufferStorageVector with 2 BufferStorage with the same TensorMemory
61  size_t numTensors = 5;
62  std::vector<std::shared_ptr<TensorMemory>> tensorMemoryPointerVector(numTensors);
63  std::vector<std::shared_ptr<TensorMemory>> tensorMemoryVector;
64  tensorMemoryVector.reserve(numTensors);
65 
66  std::vector<size_t> offsets(numTensors);
67  std::iota(std::begin(offsets), std::end(offsets), 0);
68 
69  for (uint32_t idx = 0; idx < tensorMemoryPointerVector.size(); ++idx)
70  {
71  tensorMemoryVector.emplace_back(std::make_shared<TensorMemory>(TensorMemory{offsets[idx], 0, nullptr}));
72 
73  tensorMemoryPointerVector[idx] = tensorMemoryVector[idx];
74  }
75 
76  std::vector<BufferStorage> bufferStorageVector;
77  bufferStorageVector.emplace_back(BufferStorage{tensorMemoryPointerVector, numTensors});
78  bufferStorageVector.emplace_back(BufferStorage{tensorMemoryPointerVector, numTensors});
79 
80  // Create an instance of the SampleCustomAllocator
81  std::shared_ptr<SampleCustomAllocator> customAllocator =
82  std::make_unique<SampleCustomAllocator>(SampleCustomAllocator());
83 
84  customAllocator->m_Values = {10, 11, 12, 13, 14};
85  // Check that the test was set up correctly
86  CHECK(customAllocator->m_Values.size() == numTensors);
87 
88  size_t bufferVecSize = bufferStorageVector.size();
89  // Utilise 3 functions in the MemoryManager. Check the counters and the pointer to the values are correct.
90  MemoryManager memoryManager;
91  memoryManager.StoreMemToAllocate(bufferStorageVector, customAllocator);
92 
93  memoryManager.Allocate();
94  CHECK(customAllocator->m_CounterAllocate == bufferVecSize);
95 
96  uint32_t idx = 0;
97  for (auto tensorMemory : tensorMemoryVector)
98  {
99  auto value = reinterpret_cast<uint8_t *>(tensorMemory->m_Data);
100  CHECK(customAllocator->m_Values[idx] == *value);
101  idx += 1;
102  }
103 
104  memoryManager.Deallocate();
105  CHECK(customAllocator->m_CounterFree == bufferStorageVector.size());
106 }
107 }
void Allocate()
Allocate the amount of memory indicated by , and point each to each correspondent Tensor so that the...
void Deallocate()
Deallocate memory.
Copyright (c) 2021 ARM Limited and Contributors.
void StoreMemToAllocate(std::vector< BufferStorage > bufferStorageVector, std::shared_ptr< ICustomAllocator > customAllocator, size_t typeAlignment=0)
Initialization method to store in all information needed.

◆ TEST_SUITE() [3/3]

armnn::TEST_SUITE ( "TestConstTensorLayerVisitor"  )

Definition at line 110 of file ConstTensorLayerVisitor.cpp.

References NetworkImpl::AddBatchNormalizationLayer(), NetworkImpl::AddConstantLayer(), NetworkImpl::AddConvolution2dLayer(), NetworkImpl::AddDepthwiseConvolution2dLayer(), NetworkImpl::AddFullyConnectedLayer(), NetworkImpl::AddLstmLayer(), NetworkImpl::AddQLstmLayer(), NetworkImpl::AddQuantizedLstmLayer(), IOutputSlot::Connect(), IConnectableLayer::ExecuteStrategy(), Float32, IConnectableLayer::GetInputSlot(), IConnectableLayer::GetOutputSlot(), LstmDescriptor::m_ActivationFunc, FullyConnectedDescriptor::m_BiasEnabled, Convolution2dDescriptor::m_BiasEnabled, DepthwiseConvolution2dDescriptor::m_BiasEnabled, QuantizedLstmInputParams::m_CellBias, LstmInputParams::m_CellBias, QLstmDescriptor::m_CellClip, LstmInputParams::m_CellLayerNormWeights, LstmInputParams::m_CellToForgetWeights, LstmInputParams::m_CellToInputWeights, LstmInputParams::m_CellToOutputWeights, LstmDescriptor::m_CifgEnabled, QLstmDescriptor::m_CifgEnabled, LstmDescriptor::m_ClippingThresCell, LstmDescriptor::m_ClippingThresProj, FullyConnectedDescriptor::m_ConstantWeights, Convolution2dDescriptor::m_DataLayout, DepthwiseConvolution2dDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_DataLayout, BatchNormalizationDescriptor::m_Eps, QuantizedLstmInputParams::m_ForgetGateBias, LstmInputParams::m_ForgetGateBias, LstmInputParams::m_ForgetLayerNormWeights, QuantizedLstmInputParams::m_InputGateBias, LstmInputParams::m_InputGateBias, LstmInputParams::m_InputLayerNormWeights, QuantizedLstmInputParams::m_InputToCellWeights, LstmInputParams::m_InputToCellWeights, QuantizedLstmInputParams::m_InputToForgetWeights, LstmInputParams::m_InputToForgetWeights, QuantizedLstmInputParams::m_InputToInputWeights, LstmInputParams::m_InputToInputWeights, QuantizedLstmInputParams::m_InputToOutputWeights, LstmInputParams::m_InputToOutputWeights, QLstmDescriptor::m_LayerNormEnabled, QuantizedLstmInputParams::m_OutputGateBias, LstmInputParams::m_OutputGateBias, LstmInputParams::m_OutputLayerNormWeights, Convolution2dDescriptor::m_PadBottom, DepthwiseConvolution2dDescriptor::m_PadBottom, Convolution2dDescriptor::m_PadLeft, DepthwiseConvolution2dDescriptor::m_PadLeft, Convolution2dDescriptor::m_PadRight, DepthwiseConvolution2dDescriptor::m_PadRight, Convolution2dDescriptor::m_PadTop, DepthwiseConvolution2dDescriptor::m_PadTop, LstmDescriptor::m_PeepholeEnabled, QLstmDescriptor::m_PeepholeEnabled, LstmInputParams::m_ProjectionBias, QLstmDescriptor::m_ProjectionClip, LstmDescriptor::m_ProjectionEnabled, QLstmDescriptor::m_ProjectionEnabled, LstmInputParams::m_ProjectionWeights, QuantizedLstmInputParams::m_RecurrentToCellWeights, LstmInputParams::m_RecurrentToCellWeights, QuantizedLstmInputParams::m_RecurrentToForgetWeights, LstmInputParams::m_RecurrentToForgetWeights, QuantizedLstmInputParams::m_RecurrentToInputWeights, LstmInputParams::m_RecurrentToInputWeights, QuantizedLstmInputParams::m_RecurrentToOutputWeights, LstmInputParams::m_RecurrentToOutputWeights, Convolution2dDescriptor::m_StrideX, DepthwiseConvolution2dDescriptor::m_StrideX, Convolution2dDescriptor::m_StrideY, DepthwiseConvolution2dDescriptor::m_StrideY, FullyConnectedDescriptor::m_TransposeWeightMatrix, NHWC, QAsymmU8, QSymmS16, QSymmS8, and Signed32.

Referenced by TEST_SUITE().

111 {
112 TEST_CASE("CheckConvolution2dLayer")
113 {
114  Convolution2dDescriptor descriptor;
115  descriptor.m_PadLeft = 2;
116  descriptor.m_PadRight = 3;
117  descriptor.m_PadBottom = 1;
118  descriptor.m_PadTop = 5;
119  descriptor.m_StrideX = 2;
120  descriptor.m_StrideY = 3;
121  descriptor.m_DataLayout = DataLayout::NHWC;
122  descriptor.m_BiasEnabled = false;
123 
124  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
125  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
126  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
127 
128  TestConstantLayerVisitor weightsVisitor(weights);
129  TestConvolution2dLayerVisitor visitor(descriptor);
130 
131  NetworkImpl net;
132 
133  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
134  IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor);
135  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
136 
137  weightsLayer->ExecuteStrategy(weightsVisitor);
138  layer->ExecuteStrategy(visitor);
139 }
140 
141 TEST_CASE("CheckNamedConvolution2dLayer")
142 {
143  const char* layerName = "Convolution2dLayer";
144  Convolution2dDescriptor descriptor;
145  descriptor.m_PadLeft = 2;
146  descriptor.m_PadRight = 3;
147  descriptor.m_PadBottom = 1;
148  descriptor.m_PadTop = 5;
149  descriptor.m_StrideX = 2;
150  descriptor.m_StrideY = 3;
151  descriptor.m_DataLayout = DataLayout::NHWC;
152 
153  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
154  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
155  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
156 
157  TestConstantLayerVisitor weightsVisitor(weights);
158  TestConvolution2dLayerVisitor visitor(descriptor, layerName);
159 
160  NetworkImpl net;
161 
162  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
163  IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, layerName);
164 
165  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
166 
167  weightsLayer->ExecuteStrategy(weightsVisitor);
168  layer->ExecuteStrategy(visitor);
169 }
170 
171 TEST_CASE("CheckConvolution2dLayerWithBiases")
172 {
173  Convolution2dDescriptor descriptor;
174  descriptor.m_PadLeft = 2;
175  descriptor.m_PadRight = 3;
176  descriptor.m_PadBottom = 1;
177  descriptor.m_PadTop = 5;
178  descriptor.m_StrideX = 2;
179  descriptor.m_StrideY = 3;
180  descriptor.m_DataLayout = DataLayout::NHWC;
181  descriptor.m_BiasEnabled = true;
182 
183  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
184  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
185  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
186 
187  std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
188  std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
189  ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
190 
191  TestConstantLayerVisitor weightsVisitor(weights);
192  TestConstantLayerVisitor biasVisitor(biases);
193  TestConvolution2dLayerVisitor visitor(descriptor);
194 
195  NetworkImpl net;
196  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
197  IConnectableLayer* const biasLayer = net.AddConstantLayer(biases);
198  IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor);
199 
200  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
201  biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
202 
203  biasLayer->ExecuteStrategy(biasVisitor);
204  weightsLayer->ExecuteStrategy(weightsVisitor);
205  layer->ExecuteStrategy(visitor);
206 }
207 
208 TEST_CASE("CheckNamedConvolution2dLayerWithBiases")
209 {
210  const char* layerName = "Convolution2dLayer";
211  Convolution2dDescriptor descriptor;
212  descriptor.m_PadLeft = 2;
213  descriptor.m_PadRight = 3;
214  descriptor.m_PadBottom = 1;
215  descriptor.m_PadTop = 5;
216  descriptor.m_StrideX = 2;
217  descriptor.m_StrideY = 3;
218  descriptor.m_DataLayout = DataLayout::NHWC;
219  descriptor.m_BiasEnabled = true;
220 
221  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
222  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
223  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
224 
225  std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
226  std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
227  ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
228 
229  TestConstantLayerVisitor weightsVisitor(weights);
230  TestConstantLayerVisitor biasVisitor(biases);
231  TestConvolution2dLayerVisitor visitor(descriptor, layerName);
232 
233  NetworkImpl net;
234  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
235  IConnectableLayer* const biasLayer = net.AddConstantLayer(biases);
236  IConnectableLayer* const layer = net.AddConvolution2dLayer(descriptor, layerName);
237 
238  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
239  biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
240 
241  biasLayer->ExecuteStrategy(biasVisitor);
242  weightsLayer->ExecuteStrategy(weightsVisitor);
243  layer->ExecuteStrategy(visitor);
244 }
245 
246 TEST_CASE("CheckDepthwiseConvolution2dLayer")
247 {
248  DepthwiseConvolution2dDescriptor descriptor;
249  descriptor.m_PadLeft = 2;
250  descriptor.m_PadRight = 3;
251  descriptor.m_PadBottom = 1;
252  descriptor.m_PadTop = 5;
253  descriptor.m_StrideX = 2;
254  descriptor.m_StrideY = 3;
255  descriptor.m_DataLayout = DataLayout::NHWC;
256 
257  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
258  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
259  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
260 
261  NetworkImpl net;
262 
263  TestConstantLayerVisitor weightsVisitor(weights);
264  TestDepthwiseConvolution2dLayerVisitor visitor(descriptor);
265 
266  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
267  IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor);
268  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
269 
270  weightsLayer->ExecuteStrategy(weightsVisitor);
271  layer->ExecuteStrategy(visitor);
272 }
273 
274 TEST_CASE("CheckNamedDepthwiseConvolution2dLayer")
275 {
276  const char* layerName = "DepthwiseConvolution2dLayer";
277  DepthwiseConvolution2dDescriptor descriptor;
278  descriptor.m_PadLeft = 2;
279  descriptor.m_PadRight = 3;
280  descriptor.m_PadBottom = 1;
281  descriptor.m_PadTop = 5;
282  descriptor.m_StrideX = 2;
283  descriptor.m_StrideY = 3;
284  descriptor.m_DataLayout = DataLayout::NHWC;
285 
286  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
287  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
288  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
289 
290  NetworkImpl net;
291 
292  TestConstantLayerVisitor weightsVisitor(weights);
293  TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, layerName);
294 
295  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
296  IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, layerName);
297  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
298 
299  weightsLayer->ExecuteStrategy(weightsVisitor);
300  layer->ExecuteStrategy(visitor);
301 }
302 
303 TEST_CASE("CheckDepthwiseConvolution2dLayerWithBiases")
304 {
305  DepthwiseConvolution2dDescriptor descriptor;
306  descriptor.m_PadLeft = 2;
307  descriptor.m_PadRight = 3;
308  descriptor.m_PadBottom = 1;
309  descriptor.m_PadTop = 5;
310  descriptor.m_StrideX = 2;
311  descriptor.m_StrideY = 3;
312  descriptor.m_DataLayout = DataLayout::NHWC;
313  descriptor.m_BiasEnabled = true;
314 
315  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
316  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
317  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
318 
319  std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
320  std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
321  ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
322 
323  TestConstantLayerVisitor weightsVisitor(weights);
324  TestConstantLayerVisitor biasesVisitor(biases);
325  TestDepthwiseConvolution2dLayerVisitor visitor(descriptor);
326 
327  NetworkImpl net;
328 
329  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
330  IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
331  IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor);
332  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
333  biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
334 
335  weightsLayer->ExecuteStrategy(weightsVisitor);
336  biasesLayer->ExecuteStrategy(biasesVisitor);
337  layer->ExecuteStrategy(visitor);
338 }
339 
340 TEST_CASE("CheckNamedDepthwiseConvolution2dLayerWithBiases")
341 {
342  const char* layerName = "DepthwiseConvolution2dLayer";
343  DepthwiseConvolution2dDescriptor descriptor;
344  descriptor.m_PadLeft = 2;
345  descriptor.m_PadRight = 3;
346  descriptor.m_PadBottom = 1;
347  descriptor.m_PadTop = 5;
348  descriptor.m_StrideX = 2;
349  descriptor.m_StrideY = 3;
350  descriptor.m_DataLayout = DataLayout::NHWC;
351  descriptor.m_BiasEnabled = true;
352 
353  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
354  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
355  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
356 
357  std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
358  std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
359  ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
360 
361  TestConstantLayerVisitor weightsVisitor(weights);
362  TestConstantLayerVisitor biasesVisitor(biases);
363  TestDepthwiseConvolution2dLayerVisitor visitor(descriptor, layerName);
364 
365  NetworkImpl net;
366 
367  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
368  IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
369  IConnectableLayer* const layer = net.AddDepthwiseConvolution2dLayer(descriptor, layerName);
370  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
371  biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
372 
373  weightsLayer->ExecuteStrategy(weightsVisitor);
374  biasesLayer->ExecuteStrategy(biasesVisitor);
375  layer->ExecuteStrategy(visitor);
376 }
377 
378 TEST_CASE("CheckFullyConnectedLayer")
379 {
380  FullyConnectedDescriptor descriptor;
381  descriptor.m_TransposeWeightMatrix = true;
382  descriptor.m_ConstantWeights = true;
383  descriptor.m_BiasEnabled = false;
384 
385  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
386  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
387  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
388 
389  TestConstantLayerVisitor weightsVisitor(weights);
390  TestFullyConnectedLayerVistor visitor(descriptor);
391 
392  NetworkImpl net;
393 
394  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
395  IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor);
396  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
397 
398  weightsLayer->ExecuteStrategy(weightsVisitor);
399  layer->ExecuteStrategy(visitor);
400 }
401 
402 TEST_CASE("CheckNamedFullyConnectedLayer")
403 {
404  const char* layerName = "FullyConnectedLayer";
405  FullyConnectedDescriptor descriptor;
406  descriptor.m_TransposeWeightMatrix = true;
407  descriptor.m_ConstantWeights = true;
408  descriptor.m_BiasEnabled = false;
409 
410  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
411  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
412  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
413 
414  TestConstantLayerVisitor weightsVisitor(weights);
415  TestFullyConnectedLayerVistor visitor(descriptor, layerName);
416 
417  NetworkImpl net;
418 
419  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
420  IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName);
421  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
422 
423  weightsLayer->ExecuteStrategy(weightsVisitor);
424  layer->ExecuteStrategy(visitor);
425 }
426 
427 TEST_CASE("CheckFullyConnectedLayerWithBiases")
428 {
429  FullyConnectedDescriptor descriptor;
430  descriptor.m_TransposeWeightMatrix = true;
431  descriptor.m_ConstantWeights = true;
432  descriptor.m_BiasEnabled = true;
433 
434  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
435  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
436  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
437 
438  std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
439  std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
440  ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
441 
442  TestConstantLayerVisitor weightsVisitor(weights);
443  TestConstantLayerVisitor biasesVisitor(biases);
444  TestFullyConnectedLayerVistor visitor(descriptor);
445 
446  NetworkImpl net;
447 
448  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
449  IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
450  IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor);
451  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
452  biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
453 
454  weightsLayer->ExecuteStrategy(weightsVisitor);
455  biasesLayer->ExecuteStrategy(biasesVisitor);
456  layer->ExecuteStrategy(visitor);
457 }
458 
459 TEST_CASE("CheckNamedFullyConnectedLayerWithBiases")
460 {
461  const char* layerName = "FullyConnectedLayer";
462  FullyConnectedDescriptor descriptor;
463  descriptor.m_TransposeWeightMatrix = true;
464  descriptor.m_ConstantWeights = true;
465  descriptor.m_BiasEnabled = true;
466 
467  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
468  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
469  ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
470 
471  std::vector<float> biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
472  std::vector<unsigned int> biasDimensions = {1, 1, 3, 3};
473  ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32, 0.0f, 0, true), biasData);
474 
475  TestConstantLayerVisitor weightsVisitor(weights);
476  TestConstantLayerVisitor biasesVisitor(biases);
477  TestFullyConnectedLayerVistor visitor(descriptor, layerName);
478 
479  NetworkImpl net;
480 
481  IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights);
482  IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases);
483  IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName);
484  weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
485  biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2));
486 
487  weightsLayer->ExecuteStrategy(weightsVisitor);
488  biasesLayer->ExecuteStrategy(biasesVisitor);
489  layer->ExecuteStrategy(visitor);
490 }
491 
492 TEST_CASE("CheckBatchNormalizationLayer")
493 {
494  BatchNormalizationDescriptor descriptor;
495  descriptor.m_Eps = 0.0002f;
496  descriptor.m_DataLayout = DataLayout::NHWC;
497 
498  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
499  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
500  ConstTensor mean(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
501 
502  std::vector<float> varianceData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
503  std::vector<unsigned int> varianceDimensions = {1, 1, 3, 3};
504  ConstTensor variance(TensorInfo(4, varianceDimensions.data(), DataType::Float32, 0.0f, 0, true), varianceData);
505 
506  std::vector<float> betaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
507  std::vector<unsigned int> betaDimensions = {1, 1, 3, 3};
508  ConstTensor beta(TensorInfo(4, betaDimensions.data(), DataType::Float32, 0.0f, 0, true), betaData);
509 
510  std::vector<float> gammaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
511  std::vector<unsigned int> gammaDimensions = {1, 1, 3, 3};
512  ConstTensor gamma(TensorInfo(4, gammaDimensions.data(), DataType::Float32, 0.0f, 0, true), gammaData);
513 
514  TestBatchNormalizationLayerVisitor visitor(descriptor, mean, variance, beta, gamma);
515 
516  NetworkImpl net;
517 
518  IConnectableLayer* const layer = net.AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma);
519  layer->ExecuteStrategy(visitor);
520 }
521 
522 TEST_CASE("CheckNamedBatchNormalizationLayer")
523 {
524  const char* layerName = "BatchNormalizationLayer";
525  BatchNormalizationDescriptor descriptor;
526  descriptor.m_Eps = 0.0002f;
527  descriptor.m_DataLayout = DataLayout::NHWC;
528 
529  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
530  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
531  ConstTensor mean(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
532 
533  std::vector<float> varianceData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
534  std::vector<unsigned int> varianceDimensions = {1, 1, 3, 3};
535  ConstTensor variance(TensorInfo(4, varianceDimensions.data(), DataType::Float32, 0.0f, 0, true), varianceData);
536 
537  std::vector<float> betaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
538  std::vector<unsigned int> betaDimensions = {1, 1, 3, 3};
539  ConstTensor beta(TensorInfo(4, betaDimensions.data(), DataType::Float32, 0.0f, 0, true), betaData);
540 
541  std::vector<float> gammaData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
542  std::vector<unsigned int> gammaDimensions = {1, 1, 3, 3};
543  ConstTensor gamma(TensorInfo(4, gammaDimensions.data(), DataType::Float32, 0.0f, 0, true), gammaData);
544 
545  TestBatchNormalizationLayerVisitor visitor(descriptor, mean, variance, beta, gamma, layerName);
546 
547  NetworkImpl net;
548 
549  IConnectableLayer* const layer = net.AddBatchNormalizationLayer(
550  descriptor, mean, variance, beta, gamma, layerName);
551  layer->ExecuteStrategy(visitor);
552 }
553 
554 TEST_CASE("CheckConstLayer")
555 {
556  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
557  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
558  ConstTensor input(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
559 
560  TestConstantLayerVisitor visitor(input);
561 
562  NetworkImpl net;
563 
564  IConnectableLayer* const layer = net.AddConstantLayer(input);
565  layer->ExecuteStrategy(visitor);
566 }
567 
568 TEST_CASE("CheckNamedConstLayer")
569 {
570  const char* layerName = "ConstantLayer";
571  std::vector<float> data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
572  std::vector<unsigned int> dimensions = {1, 1, 3, 3};
573  ConstTensor input(TensorInfo(4, dimensions.data(), DataType::Float32, 0.0f, 0, true), data);
574 
575  TestConstantLayerVisitor visitor(input, layerName);
576 
577  NetworkImpl net;
578 
579  IConnectableLayer* const layer = net.AddConstantLayer(input, layerName);
580  layer->ExecuteStrategy(visitor);
581 }
582 
583 TEST_CASE("CheckLstmLayerBasic")
584 {
585  LstmDescriptor descriptor;
586  descriptor.m_ActivationFunc = 3;
587  descriptor.m_ClippingThresProj = 0.5f;
588  descriptor.m_ClippingThresCell = 0.3f;
589  descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
590 
591  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
592  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
593  ConstTensor inputToForgetWeights(
594  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
595  inputToForgetWeightsData);
596 
597  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
598  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
599  ConstTensor inputToCellWeights(
600  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
601  inputToCellWeightsData);
602 
603  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
604  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
605  ConstTensor inputToOutputWeights(
606  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
607  inputToOutputWeightsData);
608 
609  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
610  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
611  ConstTensor recurrentToForgetWeights(
612  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
613  recurrentToForgetWeightsData);
614 
615  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
616  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
617  ConstTensor recurrentToCellWeights(
618  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
619  recurrentToCellWeightsData);
620 
621  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
622  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
623  ConstTensor recurrentToOutputWeights(
624  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
625  recurrentToOutputWeightsData);
626 
627  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
628  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
629  ConstTensor forgetGateBias(
630  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
631  forgetGateBiasData);
632 
633  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
634  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
635  ConstTensor cellBias(
636  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
637  cellBiasData);
638 
639  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
640  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
641  ConstTensor outputGateBias(
642  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
643  outputGateBiasData);
644 
645  LstmInputParams params;
646  params.m_InputToForgetWeights = &inputToForgetWeights;
647  params.m_InputToCellWeights = &inputToCellWeights;
648  params.m_InputToOutputWeights = &inputToOutputWeights;
649  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
650  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
651  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
652  params.m_ForgetGateBias = &forgetGateBias;
653  params.m_CellBias = &cellBias;
654  params.m_OutputGateBias = &outputGateBias;
655 
656  TestLstmLayerVisitor visitor(descriptor, params);
657 
658  NetworkImpl net;
659 
660  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
661  layer->ExecuteStrategy(visitor);
662 }
663 
664 TEST_CASE("CheckNamedLstmLayerBasic")
665 {
666  const char* layerName = "LstmLayer";
667  LstmDescriptor descriptor;
668  descriptor.m_ActivationFunc = 3;
669  descriptor.m_ClippingThresProj = 0.5f;
670  descriptor.m_ClippingThresCell = 0.3f;
671  descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
672 
673  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
674  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
675  ConstTensor inputToForgetWeights(
676  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
677  inputToForgetWeightsData);
678 
679  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
680  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
681  ConstTensor inputToCellWeights(
682  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
683  inputToCellWeightsData);
684 
685  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
686  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
687  ConstTensor inputToOutputWeights(
688  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
689  inputToOutputWeightsData);
690 
691  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
692  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
693  ConstTensor recurrentToForgetWeights(
694  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
695  recurrentToForgetWeightsData);
696 
697  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
698  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
699  ConstTensor recurrentToCellWeights(
700  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
701  recurrentToCellWeightsData);
702 
703  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
704  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
705  ConstTensor recurrentToOutputWeights(
706  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
707  recurrentToOutputWeightsData);
708 
709  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
710  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
711  ConstTensor forgetGateBias(
712  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
713  forgetGateBiasData);
714 
715  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
716  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
717  ConstTensor cellBias(
718  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
719  cellBiasData);
720 
721  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
722  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
723  ConstTensor outputGateBias(
724  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
725  outputGateBiasData);
726 
727  LstmInputParams params;
728  params.m_InputToForgetWeights = &inputToForgetWeights;
729  params.m_InputToCellWeights = &inputToCellWeights;
730  params.m_InputToOutputWeights = &inputToOutputWeights;
731  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
732  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
733  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
734  params.m_ForgetGateBias = &forgetGateBias;
735  params.m_CellBias = &cellBias;
736  params.m_OutputGateBias = &outputGateBias;
737 
738  TestLstmLayerVisitor visitor(descriptor, params, layerName);
739 
740  NetworkImpl net;
741 
742  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
743  layer->ExecuteStrategy(visitor);
744 }
745 
746 TEST_CASE("CheckLstmLayerCifgDisabled")
747 {
748  LstmDescriptor descriptor;
749  descriptor.m_ActivationFunc = 3;
750  descriptor.m_ClippingThresProj = 0.5f;
751  descriptor.m_ClippingThresCell = 0.3f;
752  descriptor.m_CifgEnabled = false; // if this is true then we DON'T need to set the OptCifgParams
753 
754  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
755  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
756  ConstTensor inputToForgetWeights(
757  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
758  inputToForgetWeightsData);
759 
760  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
761  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
762  ConstTensor inputToCellWeights(
763  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
764  inputToCellWeightsData);
765 
766  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
767  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
768  ConstTensor inputToOutputWeights(
769  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
770  inputToOutputWeightsData);
771 
772  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
773  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
774  ConstTensor recurrentToForgetWeights(
775  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
776  recurrentToForgetWeightsData);
777 
778  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
779  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
780  ConstTensor recurrentToCellWeights(
781  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
782  recurrentToCellWeightsData);
783 
784  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
785  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
786  ConstTensor recurrentToOutputWeights(
787  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
788  recurrentToOutputWeightsData);
789 
790  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
791  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
792  ConstTensor forgetGateBias(
793  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
794  forgetGateBiasData);
795 
796  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
797  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
798  ConstTensor cellBias(
799  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
800  cellBiasData);
801 
802  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
803  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
804  ConstTensor outputGateBias(
805  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
806  outputGateBiasData);
807 
808  std::vector<float> inputToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
809  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
810  ConstTensor inputToInputWeights(
811  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
812  inputToInputWeightsData);
813 
814  std::vector<float> recurrentToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
815  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
816  ConstTensor recurrentToInputWeights(
817  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
818  recurrentToInputWeightsData);
819 
820  std::vector<float> inputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
821  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
822  ConstTensor inputGateBias(
823  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
824  inputGateBiasData);
825 
826  LstmInputParams params;
827  params.m_InputToForgetWeights = &inputToForgetWeights;
828  params.m_InputToCellWeights = &inputToCellWeights;
829  params.m_InputToOutputWeights = &inputToOutputWeights;
830  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
831  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
832  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
833  params.m_ForgetGateBias = &forgetGateBias;
834  params.m_CellBias = &cellBias;
835  params.m_OutputGateBias = &outputGateBias;
836 
837  params.m_InputToInputWeights = &inputToInputWeights;
838  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
839  params.m_InputGateBias = &inputGateBias;
840 
841  TestLstmLayerVisitor visitor(descriptor, params);
842 
843  NetworkImpl net;
844 
845  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
846  layer->ExecuteStrategy(visitor);
847 }
848 
849 TEST_CASE("CheckNamedLstmLayerCifgDisabled")
850 {
851  const char* layerName = "LstmLayer";
852  LstmDescriptor descriptor;
853  descriptor.m_ActivationFunc = 3;
854  descriptor.m_ClippingThresProj = 0.5f;
855  descriptor.m_ClippingThresCell = 0.3f;
856  descriptor.m_CifgEnabled = false; // if this is true then we DON'T need to set the OptCifgParams
857 
858  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
859  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
860  ConstTensor inputToForgetWeights(
861  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
862  inputToForgetWeightsData);
863 
864  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
865  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
866  ConstTensor inputToCellWeights(
867  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
868  inputToCellWeightsData);
869 
870  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
871  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
872  ConstTensor inputToOutputWeights(
873  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
874  inputToOutputWeightsData);
875 
876  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
877  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
878  ConstTensor recurrentToForgetWeights(
879  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
880  recurrentToForgetWeightsData);
881 
882  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
883  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
884  ConstTensor recurrentToCellWeights(
885  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
886  recurrentToCellWeightsData);
887 
888  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
889  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
890  ConstTensor recurrentToOutputWeights(
891  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
892  recurrentToOutputWeightsData);
893 
894  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
895  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
896  ConstTensor forgetGateBias(
897  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
898  forgetGateBiasData);
899 
900  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
901  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
902  ConstTensor cellBias(
903  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
904  cellBiasData);
905 
906  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
907  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
908  ConstTensor outputGateBias(
909  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
910  outputGateBiasData);
911 
912  std::vector<float> inputToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
913  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
914  ConstTensor inputToInputWeights(
915  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
916  inputToInputWeightsData);
917 
918  std::vector<float> recurrentToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
919  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
920  ConstTensor recurrentToInputWeights(
921  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
922  recurrentToInputWeightsData);
923 
924  std::vector<float> inputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
925  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
926  ConstTensor inputGateBias(
927  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
928  inputGateBiasData);
929 
930  LstmInputParams params;
931  params.m_InputToForgetWeights = &inputToForgetWeights;
932  params.m_InputToCellWeights = &inputToCellWeights;
933  params.m_InputToOutputWeights = &inputToOutputWeights;
934  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
935  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
936  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
937  params.m_ForgetGateBias = &forgetGateBias;
938  params.m_CellBias = &cellBias;
939  params.m_OutputGateBias = &outputGateBias;
940 
941  params.m_InputToInputWeights = &inputToInputWeights;
942  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
943  params.m_InputGateBias = &inputGateBias;
944 
945  TestLstmLayerVisitor visitor(descriptor, params, layerName);
946 
947  NetworkImpl net;
948 
949  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
950  layer->ExecuteStrategy(visitor);
951 }
952 
953 // TODO add one with peephole
954 TEST_CASE("CheckLstmLayerPeephole")
955 {
956  LstmDescriptor descriptor;
957  descriptor.m_ActivationFunc = 3;
958  descriptor.m_ClippingThresProj = 0.5f;
959  descriptor.m_ClippingThresCell = 0.3f;
960  descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
961  descriptor.m_PeepholeEnabled = true;
962 
963  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
964  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
965  ConstTensor inputToForgetWeights(
966  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
967  inputToForgetWeightsData);
968 
969  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
970  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
971  ConstTensor inputToCellWeights(
972  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
973  inputToCellWeightsData);
974 
975  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
976  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
977  ConstTensor inputToOutputWeights(
978  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
979  inputToOutputWeightsData);
980 
981  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
982  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
983  ConstTensor recurrentToForgetWeights(
984  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
985  recurrentToForgetWeightsData);
986 
987  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
988  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
989  ConstTensor recurrentToCellWeights(
990  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
991  recurrentToCellWeightsData);
992 
993  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
994  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
995  ConstTensor recurrentToOutputWeights(
996  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
997  recurrentToOutputWeightsData);
998 
999  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1000  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1001  ConstTensor forgetGateBias(
1002  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1003  forgetGateBiasData);
1004 
1005  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1006  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1007  ConstTensor cellBias(
1008  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1009  cellBiasData);
1010 
1011  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1012  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1013  ConstTensor outputGateBias(
1014  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1015  outputGateBiasData);
1016 
1017  std::vector<float> cellToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1018  std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
1019  ConstTensor cellToForgetWeights(
1020  TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1021  cellToForgetWeightsData);
1022 
1023  std::vector<float> cellToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1024  std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
1025  ConstTensor cellToOutputWeights(
1026  TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1027  cellToOutputWeightsData);
1028 
1029  LstmInputParams params;
1030  params.m_InputToForgetWeights = &inputToForgetWeights;
1031  params.m_InputToCellWeights = &inputToCellWeights;
1032  params.m_InputToOutputWeights = &inputToOutputWeights;
1033  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1034  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1035  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1036  params.m_ForgetGateBias = &forgetGateBias;
1037  params.m_CellBias = &cellBias;
1038  params.m_OutputGateBias = &outputGateBias;
1039 
1040  params.m_CellToForgetWeights = &cellToForgetWeights;
1041  params.m_CellToOutputWeights = &cellToOutputWeights;
1042 
1043  TestLstmLayerVisitor visitor(descriptor, params);
1044 
1045  NetworkImpl net;
1046 
1047  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
1048  layer->ExecuteStrategy(visitor);
1049 }
1050 
1051 TEST_CASE("CheckLstmLayerPeepholeCifgDisabled")
1052 {
1053  LstmDescriptor descriptor;
1054  descriptor.m_ActivationFunc = 3;
1055  descriptor.m_ClippingThresProj = 0.5f;
1056  descriptor.m_ClippingThresCell = 0.3f;
1057  descriptor.m_CifgEnabled = false;
1058  descriptor.m_PeepholeEnabled = true;
1059 
1060  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1061  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1062  ConstTensor inputToForgetWeights(
1063  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1064  inputToForgetWeightsData);
1065 
1066  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1067  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1068  ConstTensor inputToCellWeights(
1069  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1070  inputToCellWeightsData);
1071 
1072  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1073  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1074  ConstTensor inputToOutputWeights(
1075  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1076  inputToOutputWeightsData);
1077 
1078  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1079  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1080  ConstTensor recurrentToForgetWeights(
1081  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1082  recurrentToForgetWeightsData);
1083 
1084  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1085  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1086  ConstTensor recurrentToCellWeights(
1087  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1088  recurrentToCellWeightsData);
1089 
1090  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1091  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1092  ConstTensor recurrentToOutputWeights(
1093  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1094  recurrentToOutputWeightsData);
1095 
1096  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1097  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1098  ConstTensor forgetGateBias(
1099  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1100  forgetGateBiasData);
1101 
1102  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1103  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1104  ConstTensor cellBias(
1105  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1106  cellBiasData);
1107 
1108  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1109  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1110  ConstTensor outputGateBias(
1111  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1112  outputGateBiasData);
1113 
1114  std::vector<float> cellToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1115  std::vector<unsigned int> cellToInputWeightsDimensions = {1, 1, 3, 3};
1116  ConstTensor cellToInputWeights(
1117  TensorInfo(4, cellToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1118  cellToInputWeightsData);
1119 
1120  std::vector<float> cellToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1121  std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
1122  ConstTensor cellToForgetWeights(
1123  TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1124  cellToForgetWeightsData);
1125 
1126  std::vector<float> cellToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1127  std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
1128  ConstTensor cellToOutputWeights(
1129  TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1130  cellToOutputWeightsData);
1131 
1132  std::vector<float> inputToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1133  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
1134  ConstTensor inputToInputWeights(
1135  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1136  inputToInputWeightsData);
1137 
1138  std::vector<float> recurrentToInputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1139  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
1140  ConstTensor recurrentToInputWeights(
1141  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1142  recurrentToInputWeightsData);
1143 
1144  std::vector<float> inputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1145  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
1146  ConstTensor inputGateBias(
1147  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1148  inputGateBiasData);
1149 
1150  LstmInputParams params;
1151  // Basic params
1152  params.m_InputToForgetWeights = &inputToForgetWeights;
1153  params.m_InputToCellWeights = &inputToCellWeights;
1154  params.m_InputToOutputWeights = &inputToOutputWeights;
1155  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1156  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1157  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1158  params.m_ForgetGateBias = &forgetGateBias;
1159  params.m_CellBias = &cellBias;
1160  params.m_OutputGateBias = &outputGateBias;
1161 
1162  // Peephole params
1163  params.m_CellToInputWeights = &cellToInputWeights;
1164  params.m_CellToForgetWeights = &cellToForgetWeights;
1165  params.m_CellToOutputWeights = &cellToOutputWeights;
1166 
1167  // Cifg params
1168  params.m_InputToInputWeights = &inputToInputWeights;
1169  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
1170  params.m_InputGateBias = &inputGateBias;
1171 
1172  TestLstmLayerVisitor visitor(descriptor, params);
1173 
1174  NetworkImpl net;
1175 
1176  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
1177  layer->ExecuteStrategy(visitor);
1178 }
1179 
1180 TEST_CASE("CheckNamedLstmLayerPeephole")
1181 {
1182  const char* layerName = "LstmLayer";
1183  LstmDescriptor descriptor;
1184  descriptor.m_ActivationFunc = 3;
1185  descriptor.m_ClippingThresProj = 0.5f;
1186  descriptor.m_ClippingThresCell = 0.3f;
1187  descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
1188  descriptor.m_PeepholeEnabled = true;
1189 
1190  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1191  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1192  ConstTensor inputToForgetWeights(
1193  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1194  inputToForgetWeightsData);
1195 
1196  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1197  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1198  ConstTensor inputToCellWeights(
1199  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1200  inputToCellWeightsData);
1201 
1202  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1203  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1204  ConstTensor inputToOutputWeights(
1205  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1206  inputToOutputWeightsData);
1207 
1208  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1209  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1210  ConstTensor recurrentToForgetWeights(
1211  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1212  recurrentToForgetWeightsData);
1213 
1214  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1215  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1216  ConstTensor recurrentToCellWeights(
1217  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1218  recurrentToCellWeightsData);
1219 
1220  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1221  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1222  ConstTensor recurrentToOutputWeights(
1223  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1224  recurrentToOutputWeightsData);
1225 
1226  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1227  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1228  ConstTensor forgetGateBias(
1229  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1230  forgetGateBiasData);
1231 
1232  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1233  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1234  ConstTensor cellBias(
1235  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1236  cellBiasData);
1237 
1238  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1239  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1240  ConstTensor outputGateBias(
1241  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1242  outputGateBiasData);
1243 
1244  std::vector<float> cellToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1245  std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
1246  ConstTensor cellToForgetWeights(
1247  TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1248  cellToForgetWeightsData);
1249 
1250  std::vector<float> cellToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1251  std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
1252  ConstTensor cellToOutputWeights(
1253  TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1254  cellToOutputWeightsData);
1255 
1256  LstmInputParams params;
1257  params.m_InputToForgetWeights = &inputToForgetWeights;
1258  params.m_InputToCellWeights = &inputToCellWeights;
1259  params.m_InputToOutputWeights = &inputToOutputWeights;
1260  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1261  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1262  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1263  params.m_ForgetGateBias = &forgetGateBias;
1264  params.m_CellBias = &cellBias;
1265  params.m_OutputGateBias = &outputGateBias;
1266 
1267  params.m_CellToForgetWeights = &cellToForgetWeights;
1268  params.m_CellToOutputWeights = &cellToOutputWeights;
1269 
1270  TestLstmLayerVisitor visitor(descriptor, params, layerName);
1271 
1272  NetworkImpl net;
1273 
1274  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
1275  layer->ExecuteStrategy(visitor);
1276 }
1277 
1278 // TODO add one with projection
1279 TEST_CASE("CheckLstmLayerProjection")
1280 {
1281  LstmDescriptor descriptor;
1282  descriptor.m_ActivationFunc = 3;
1283  descriptor.m_ClippingThresProj = 0.5f;
1284  descriptor.m_ClippingThresCell = 0.3f;
1285  descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
1286  descriptor.m_ProjectionEnabled = true;
1287 
1288  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1289  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1290  ConstTensor inputToForgetWeights(
1291  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1292  inputToForgetWeightsData);
1293 
1294  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1295  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1296  ConstTensor inputToCellWeights(
1297  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1298  inputToCellWeightsData);
1299 
1300  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1301  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1302  ConstTensor inputToOutputWeights(
1303  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1304  inputToOutputWeightsData);
1305 
1306  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1307  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1308  ConstTensor recurrentToForgetWeights(
1309  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1310  recurrentToForgetWeightsData);
1311 
1312  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1313  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1314  ConstTensor recurrentToCellWeights(
1315  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1316  recurrentToCellWeightsData);
1317 
1318  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1319  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1320  ConstTensor recurrentToOutputWeights(
1321  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1322  recurrentToOutputWeightsData);
1323 
1324  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1325  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1326  ConstTensor forgetGateBias(
1327  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1328  forgetGateBiasData);
1329 
1330  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1331  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1332  ConstTensor cellBias(
1333  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1334  cellBiasData);
1335 
1336  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1337  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1338  ConstTensor outputGateBias(
1339  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1340  outputGateBiasData);
1341 
1342  std::vector<float> projectionBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1343  std::vector<unsigned int> projectionBiasDimensions = {1, 1, 3, 3};
1344  ConstTensor projectionBias(
1345  TensorInfo(4, projectionBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1346  projectionBiasData);
1347 
1348  std::vector<float> projectionWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1349  std::vector<unsigned int> projectionWeightsDimensions = {1, 1, 3, 3};
1350  ConstTensor projectionWeights(
1351  TensorInfo(4, projectionWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1352  projectionWeightsData);
1353 
1354  LstmInputParams params;
1355  params.m_InputToForgetWeights = &inputToForgetWeights;
1356  params.m_InputToCellWeights = &inputToCellWeights;
1357  params.m_InputToOutputWeights = &inputToOutputWeights;
1358  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1359  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1360  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1361  params.m_ForgetGateBias = &forgetGateBias;
1362  params.m_CellBias = &cellBias;
1363  params.m_OutputGateBias = &outputGateBias;
1364 
1365  params.m_ProjectionWeights = &projectionWeights;
1366  params.m_ProjectionBias = &projectionBias;
1367 
1368  TestLstmLayerVisitor visitor(descriptor, params);
1369 
1370  NetworkImpl net;
1371 
1372  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params);
1373  layer->ExecuteStrategy(visitor);
1374 }
1375 
1376 TEST_CASE("CheckNamedLstmLayerProjection")
1377 {
1378  const char* layerName = "LstmLayer";
1379  LstmDescriptor descriptor;
1380  descriptor.m_ActivationFunc = 3;
1381  descriptor.m_ClippingThresProj = 0.5f;
1382  descriptor.m_ClippingThresCell = 0.3f;
1383  descriptor.m_CifgEnabled = true; // if this is true then we DON'T need to set the OptCifgParams
1384  descriptor.m_ProjectionEnabled = true;
1385 
1386  std::vector<float> inputToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1387  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1388  ConstTensor inputToForgetWeights(
1389  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1390  inputToForgetWeightsData);
1391 
1392  std::vector<float> inputToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1393  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1394  ConstTensor inputToCellWeights(
1395  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1396  inputToCellWeightsData);
1397 
1398  std::vector<float> inputToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1399  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1400  ConstTensor inputToOutputWeights(
1401  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1402  inputToOutputWeightsData);
1403 
1404  std::vector<float> recurrentToForgetWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1405  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1406  ConstTensor recurrentToForgetWeights(
1407  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1408  recurrentToForgetWeightsData);
1409 
1410  std::vector<float> recurrentToCellWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1411  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1412  ConstTensor recurrentToCellWeights(
1413  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1414  recurrentToCellWeightsData);
1415 
1416  std::vector<float> recurrentToOutputWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1417  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1418  ConstTensor recurrentToOutputWeights(
1419  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1420  recurrentToOutputWeightsData);
1421 
1422  std::vector<float> forgetGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1423  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1424  ConstTensor forgetGateBias(
1425  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1426  forgetGateBiasData);
1427 
1428  std::vector<float> cellBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1429  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1430  ConstTensor cellBias(
1431  TensorInfo(4, cellBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1432  cellBiasData);
1433 
1434  std::vector<float> outputGateBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1435  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1436  ConstTensor outputGateBias(
1437  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1438  outputGateBiasData);
1439 
1440  std::vector<float> projectionBiasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1441  std::vector<unsigned int> projectionBiasDimensions = {1, 1, 3, 3};
1442  ConstTensor projectionBias(
1443  TensorInfo(4, projectionBiasDimensions.data(), DataType::Float32, 0.0f, 0, true),
1444  projectionBiasData);
1445 
1446  std::vector<float> projectionWeightsData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
1447  std::vector<unsigned int> projectionWeightsDimensions = {1, 1, 3, 3};
1448  ConstTensor projectionWeights(
1449  TensorInfo(4, projectionWeightsDimensions.data(), DataType::Float32, 0.0f, 0, true),
1450  projectionWeightsData);
1451 
1452  LstmInputParams params;
1453  params.m_InputToForgetWeights = &inputToForgetWeights;
1454  params.m_InputToCellWeights = &inputToCellWeights;
1455  params.m_InputToOutputWeights = &inputToOutputWeights;
1456  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1457  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1458  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1459  params.m_ForgetGateBias = &forgetGateBias;
1460  params.m_CellBias = &cellBias;
1461  params.m_OutputGateBias = &outputGateBias;
1462 
1463  params.m_ProjectionWeights = &projectionWeights;
1464  params.m_ProjectionBias = &projectionBias;
1465 
1466  TestLstmLayerVisitor visitor(descriptor, params, layerName);
1467 
1468  NetworkImpl net;
1469 
1470  IConnectableLayer* const layer = net.AddLstmLayer(descriptor, params, layerName);
1471  layer->ExecuteStrategy(visitor);
1472 }
1473 
1474 TEST_CASE("CheckQLstmLayerBasic")
1475 {
1476  QLstmDescriptor descriptor;
1477  descriptor.m_ProjectionClip = 0.5f;
1478  descriptor.m_CellClip = 0.3f;
1479  descriptor.m_CifgEnabled = true;
1480 
1481  // Basic params ONLY
1482  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1483  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1484  ConstTensor inputToForgetWeights(
1485  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1486  inputToForgetWeightsData);
1487 
1488  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1489  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1490  ConstTensor inputToCellWeights(
1491  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1492  inputToCellWeightsData);
1493 
1494  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1495  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1496  ConstTensor inputToOutputWeights(
1497  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1498  inputToOutputWeightsData);
1499 
1500  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1501  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1502  ConstTensor recurrentToForgetWeights(
1503  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1504  recurrentToForgetWeightsData);
1505 
1506  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1507  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1508  ConstTensor recurrentToCellWeights(
1509  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1510  recurrentToCellWeightsData);
1511 
1512  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1513  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1514  ConstTensor recurrentToOutputWeights(
1515  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1516  recurrentToOutputWeightsData);
1517 
1518  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1519  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1520  ConstTensor forgetGateBias(
1521  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1522  forgetGateBiasData);
1523 
1524  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1525  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1526  ConstTensor cellBias(
1527  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1528  cellBiasData);
1529 
1530  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1531  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1532  ConstTensor outputGateBias(
1533  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1534  outputGateBiasData);
1535 
1536  LstmInputParams params;
1537  params.m_InputToForgetWeights = &inputToForgetWeights;
1538  params.m_InputToCellWeights = &inputToCellWeights;
1539  params.m_InputToOutputWeights = &inputToOutputWeights;
1540  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1541  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1542  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1543  params.m_ForgetGateBias = &forgetGateBias;
1544  params.m_CellBias = &cellBias;
1545  params.m_OutputGateBias = &outputGateBias;
1546 
1547  TestQLstmLayerVisitor visitor(descriptor, params);
1548 
1549  NetworkImpl net;
1550 
1551  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
1552  layer->ExecuteStrategy(visitor);
1553 }
1554 
1555 TEST_CASE("CheckNamedQLstmLayerBasic")
1556 {
1557  const char* layerName = "QLstmLayer";
1558  QLstmDescriptor descriptor;
1559  descriptor.m_ProjectionClip = 0.5f;
1560  descriptor.m_CellClip = 0.3f;
1561  descriptor.m_CifgEnabled = true;
1562 
1563  // Basic params ONLY
1564  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1565  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1566  ConstTensor inputToForgetWeights(
1567  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1568  inputToForgetWeightsData);
1569 
1570  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1571  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1572  ConstTensor inputToCellWeights(
1573  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1574  inputToCellWeightsData);
1575 
1576  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1577  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1578  ConstTensor inputToOutputWeights(
1579  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1580  inputToOutputWeightsData);
1581 
1582  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1583  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1584  ConstTensor recurrentToForgetWeights(
1585  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1586  recurrentToForgetWeightsData);
1587 
1588  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1589  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1590  ConstTensor recurrentToCellWeights(
1591  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1592  recurrentToCellWeightsData);
1593 
1594  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1595  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1596  ConstTensor recurrentToOutputWeights(
1597  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1598  recurrentToOutputWeightsData);
1599 
1600  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1601  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1602  ConstTensor forgetGateBias(
1603  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1604  forgetGateBiasData);
1605 
1606  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1607  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1608  ConstTensor cellBias(
1609  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1610  cellBiasData);
1611 
1612  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1613  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1614  ConstTensor outputGateBias(
1615  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1616  outputGateBiasData);
1617 
1618  LstmInputParams params;
1619  params.m_InputToForgetWeights = &inputToForgetWeights;
1620  params.m_InputToCellWeights = &inputToCellWeights;
1621  params.m_InputToOutputWeights = &inputToOutputWeights;
1622  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1623  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1624  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1625  params.m_ForgetGateBias = &forgetGateBias;
1626  params.m_CellBias = &cellBias;
1627  params.m_OutputGateBias = &outputGateBias;
1628 
1629  TestQLstmLayerVisitor visitor(descriptor, params, layerName);
1630 
1631  NetworkImpl net;
1632 
1633  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params, layerName);
1634  layer->ExecuteStrategy(visitor);
1635 }
1636 
1637 TEST_CASE("CheckQLstmLayerCifgDisabled")
1638 {
1639  QLstmDescriptor descriptor;
1640  descriptor.m_ProjectionClip = 0.5f;
1641  descriptor.m_CellClip = 0.3f;
1642  descriptor.m_CifgEnabled = false;
1643 
1644  // Basic params
1645  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1646  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1647  ConstTensor inputToForgetWeights(
1648  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1649  inputToForgetWeightsData);
1650 
1651  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1652  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1653  ConstTensor inputToCellWeights(
1654  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1655  inputToCellWeightsData);
1656 
1657  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1658  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1659  ConstTensor inputToOutputWeights(
1660  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1661  inputToOutputWeightsData);
1662 
1663  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1664  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1665  ConstTensor recurrentToForgetWeights(
1666  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1667  recurrentToForgetWeightsData);
1668 
1669  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1670  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1671  ConstTensor recurrentToCellWeights(
1672  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1673  recurrentToCellWeightsData);
1674 
1675  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1676  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1677  ConstTensor recurrentToOutputWeights(
1678  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1679  recurrentToOutputWeightsData);
1680 
1681  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1682  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1683  ConstTensor forgetGateBias(
1684  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1685  forgetGateBiasData);
1686 
1687  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1688  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1689  ConstTensor cellBias(
1690  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1691  cellBiasData);
1692 
1693  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1694  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1695  ConstTensor outputGateBias(
1696  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1697  outputGateBiasData);
1698 
1699  // CIFG disabled params
1700  std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1701  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
1702  ConstTensor inputToInputWeights(
1703  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1704  inputToInputWeightsData);
1705 
1706  std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1707  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
1708  ConstTensor recurrentToInputWeights(
1709  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1710  recurrentToInputWeightsData);
1711 
1712  std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1713  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
1714  ConstTensor inputGateBias(
1715  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1716  inputGateBiasData);
1717 
1718  LstmInputParams params;
1719 
1720  // Basic params
1721  params.m_InputToForgetWeights = &inputToForgetWeights;
1722  params.m_InputToCellWeights = &inputToCellWeights;
1723  params.m_InputToOutputWeights = &inputToOutputWeights;
1724  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1725  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1726  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1727  params.m_ForgetGateBias = &forgetGateBias;
1728  params.m_CellBias = &cellBias;
1729  params.m_OutputGateBias = &outputGateBias;
1730 
1731  // CIFG disabled params
1732  params.m_InputToInputWeights = &inputToInputWeights;
1733  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
1734  params.m_InputGateBias = &inputGateBias;
1735 
1736  TestQLstmLayerVisitor visitor(descriptor, params);
1737 
1738  NetworkImpl net;
1739 
1740  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
1741  layer->ExecuteStrategy(visitor);
1742 }
1743 
1744 TEST_CASE("CheckQLstmLayerCifgDisabledPeepholeEnabled")
1745 {
1746  QLstmDescriptor descriptor;
1747  descriptor.m_ProjectionClip = 0.5f;
1748  descriptor.m_CellClip = 0.3f;
1749  descriptor.m_CifgEnabled = false;
1750  descriptor.m_PeepholeEnabled = true;
1751 
1752  // Basic params
1753  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1754  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1755  ConstTensor inputToForgetWeights(
1756  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1757  inputToForgetWeightsData);
1758 
1759  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1760  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1761  ConstTensor inputToCellWeights(
1762  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1763  inputToCellWeightsData);
1764 
1765  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1766  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1767  ConstTensor inputToOutputWeights(
1768  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1769  inputToOutputWeightsData);
1770 
1771  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1772  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1773  ConstTensor recurrentToForgetWeights(
1774  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1775  recurrentToForgetWeightsData);
1776 
1777  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1778  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1779  ConstTensor recurrentToCellWeights(
1780  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1781  recurrentToCellWeightsData);
1782 
1783  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1784  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1785  ConstTensor recurrentToOutputWeights(
1786  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1787  recurrentToOutputWeightsData);
1788 
1789  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1790  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1791  ConstTensor forgetGateBias(
1792  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1793  forgetGateBiasData);
1794 
1795  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1796  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1797  ConstTensor cellBias(
1798  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1799  cellBiasData);
1800 
1801  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1802  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1803  ConstTensor outputGateBias(
1804  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1805  outputGateBiasData);
1806 
1807  // CIFG disabled params
1808  std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1809  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
1810  ConstTensor inputToInputWeights(
1811  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1812  inputToInputWeightsData);
1813 
1814  std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1815  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
1816  ConstTensor recurrentToInputWeights(
1817  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1818  recurrentToInputWeightsData);
1819 
1820  std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1821  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
1822  ConstTensor inputGateBias(
1823  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1824  inputGateBiasData);
1825 
1826  // Peephole enabled, CIFG disabled params
1827  std::vector<int16_t> cellToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1828  std::vector<unsigned int> cellToInputWeightsDimensions = {1, 1, 3, 3};
1829  ConstTensor cellToInputWeights(
1830  TensorInfo(4, cellToInputWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
1831  cellToInputWeightsData);
1832 
1833  std::vector<int16_t> cellToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1834  std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
1835  ConstTensor cellToForgetWeights(
1836  TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
1837  cellToForgetWeightsData);
1838 
1839  std::vector<int16_t> cellToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1840  std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
1841  ConstTensor cellToOutputWeights(
1842  TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
1843  cellToOutputWeightsData);
1844 
1845  LstmInputParams params;
1846 
1847  // Basic params
1848  params.m_InputToForgetWeights = &inputToForgetWeights;
1849  params.m_InputToCellWeights = &inputToCellWeights;
1850  params.m_InputToOutputWeights = &inputToOutputWeights;
1851  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1852  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1853  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1854  params.m_ForgetGateBias = &forgetGateBias;
1855  params.m_CellBias = &cellBias;
1856  params.m_OutputGateBias = &outputGateBias;
1857 
1858  // CIFG disabled params
1859  params.m_InputToInputWeights = &inputToInputWeights;
1860  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
1861  params.m_InputGateBias = &inputGateBias;
1862 
1863  // Peephole enabled, CIFG disabled params
1864  params.m_CellToInputWeights = &cellToInputWeights;
1865  params.m_CellToForgetWeights = &cellToForgetWeights;
1866  params.m_CellToOutputWeights = &cellToOutputWeights;
1867 
1868  TestQLstmLayerVisitor visitor(descriptor, params);
1869 
1870  NetworkImpl net;
1871 
1872  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
1873  layer->ExecuteStrategy(visitor);
1874 }
1875 
1876 TEST_CASE("CheckQLstmLayerCifgEnabledPeepholeEnabled")
1877 {
1878  QLstmDescriptor descriptor;
1879  descriptor.m_ProjectionClip = 0.5f;
1880  descriptor.m_CellClip = 0.3f;
1881  descriptor.m_CifgEnabled = true;
1882  descriptor.m_PeepholeEnabled = true;
1883 
1884  // Basic params
1885  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1886  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1887  ConstTensor inputToForgetWeights(
1888  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1889  inputToForgetWeightsData);
1890 
1891  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1892  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1893  ConstTensor inputToCellWeights(
1894  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1895  inputToCellWeightsData);
1896 
1897  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1898  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
1899  ConstTensor inputToOutputWeights(
1900  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1901  inputToOutputWeightsData);
1902 
1903  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1904  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
1905  ConstTensor recurrentToForgetWeights(
1906  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1907  recurrentToForgetWeightsData);
1908 
1909  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1910  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
1911  ConstTensor recurrentToCellWeights(
1912  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1913  recurrentToCellWeightsData);
1914 
1915  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1916  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
1917  ConstTensor recurrentToOutputWeights(
1918  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1919  recurrentToOutputWeightsData);
1920 
1921  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1922  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
1923  ConstTensor forgetGateBias(
1924  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1925  forgetGateBiasData);
1926 
1927  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1928  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
1929  ConstTensor cellBias(
1930  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1931  cellBiasData);
1932 
1933  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1934  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
1935  ConstTensor outputGateBias(
1936  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
1937  outputGateBiasData);
1938 
1939  // Peephole enabled and CIFG enabled params
1940  std::vector<int16_t> cellToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1941  std::vector<unsigned int> cellToForgetWeightsDimensions = {1, 1, 3, 3};
1942  ConstTensor cellToForgetWeights(
1943  TensorInfo(4, cellToForgetWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
1944  cellToForgetWeightsData);
1945 
1946  std::vector<int16_t> cellToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1947  std::vector<unsigned int> cellToOutputWeightsDimensions = {1, 1, 3, 3};
1948  ConstTensor cellToOutputWeights(
1949  TensorInfo(4, cellToOutputWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
1950  cellToOutputWeightsData);
1951 
1952  LstmInputParams params;
1953 
1954  // Basic params
1955  params.m_InputToForgetWeights = &inputToForgetWeights;
1956  params.m_InputToCellWeights = &inputToCellWeights;
1957  params.m_InputToOutputWeights = &inputToOutputWeights;
1958  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
1959  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
1960  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
1961  params.m_ForgetGateBias = &forgetGateBias;
1962  params.m_CellBias = &cellBias;
1963  params.m_OutputGateBias = &outputGateBias;
1964 
1965  // Peephole enabled and CIFG enabled params
1966  params.m_CellToForgetWeights = &cellToForgetWeights;
1967  params.m_CellToOutputWeights = &cellToOutputWeights;
1968 
1969  TestQLstmLayerVisitor visitor(descriptor, params);
1970 
1971  NetworkImpl net;
1972 
1973  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
1974  layer->ExecuteStrategy(visitor);
1975 }
1976 
1977 TEST_CASE("CheckQLstmLayerProjectionEnabled")
1978 {
1979  QLstmDescriptor descriptor;
1980  descriptor.m_ProjectionClip = 0.5f;
1981  descriptor.m_CellClip = 0.3f;
1982  descriptor.m_CifgEnabled = true;
1983  descriptor.m_ProjectionEnabled = true;
1984 
1985  // Basic params ONLY
1986  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1987  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
1988  ConstTensor inputToForgetWeights(
1989  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1990  inputToForgetWeightsData);
1991 
1992  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1993  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
1994  ConstTensor inputToCellWeights(
1995  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
1996  inputToCellWeightsData);
1997 
1998  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
1999  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
2000  ConstTensor inputToOutputWeights(
2001  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2002  inputToOutputWeightsData);
2003 
2004  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2005  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
2006  ConstTensor recurrentToForgetWeights(
2007  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2008  recurrentToForgetWeightsData);
2009 
2010  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2011  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
2012  ConstTensor recurrentToCellWeights(
2013  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2014  recurrentToCellWeightsData);
2015 
2016  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2017  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
2018  ConstTensor recurrentToOutputWeights(
2019  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2020  recurrentToOutputWeightsData);
2021 
2022  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2023  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
2024  ConstTensor forgetGateBias(
2025  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2026  forgetGateBiasData);
2027 
2028  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2029  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
2030  ConstTensor cellBias(
2031  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2032  cellBiasData);
2033 
2034  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2035  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
2036  ConstTensor outputGateBias(
2037  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2038  outputGateBiasData);
2039 
2040  // Projection enabled params
2041  std::vector<uint8_t> projectionWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2042  std::vector<unsigned int> projectionWeightsDimensions = {1, 1, 3, 3};
2043  ConstTensor projectionWeights(
2044  TensorInfo(4, projectionWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2045  projectionWeightsData);
2046 
2047  std::vector<int32_t> projectionBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2048  std::vector<unsigned int> projectionBiasDimensions = {1, 1, 3, 3};
2049  ConstTensor projectionBias(
2050  TensorInfo(4, projectionBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2051  projectionBiasData);
2052 
2053  LstmInputParams params;
2054 
2055  // Basic params
2056  params.m_InputToForgetWeights = &inputToForgetWeights;
2057  params.m_InputToCellWeights = &inputToCellWeights;
2058  params.m_InputToOutputWeights = &inputToOutputWeights;
2059  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
2060  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
2061  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
2062  params.m_ForgetGateBias = &forgetGateBias;
2063  params.m_CellBias = &cellBias;
2064  params.m_OutputGateBias = &outputGateBias;
2065 
2066  // Projection enabled params
2067  params.m_ProjectionWeights = &projectionWeights;
2068  params.m_ProjectionBias = &projectionBias;
2069 
2070  TestQLstmLayerVisitor visitor(descriptor, params);
2071 
2072  NetworkImpl net;
2073 
2074  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
2075  layer->ExecuteStrategy(visitor);
2076 }
2077 
2078 TEST_CASE("CheckQLstmLayerCifgDisabledLayerNormEnabled")
2079 {
2080  QLstmDescriptor descriptor;
2081  descriptor.m_ProjectionClip = 0.5f;
2082  descriptor.m_CellClip = 0.3f;
2083  descriptor.m_CifgEnabled = false;
2084  descriptor.m_LayerNormEnabled = true;
2085 
2086  // Basic params
2087  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2088  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
2089  ConstTensor inputToForgetWeights(
2090  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2091  inputToForgetWeightsData);
2092 
2093  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2094  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
2095  ConstTensor inputToCellWeights(
2096  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2097  inputToCellWeightsData);
2098 
2099  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2100  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
2101  ConstTensor inputToOutputWeights(
2102  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2103  inputToOutputWeightsData);
2104 
2105  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2106  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
2107  ConstTensor recurrentToForgetWeights(
2108  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2109  recurrentToForgetWeightsData);
2110 
2111  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2112  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
2113  ConstTensor recurrentToCellWeights(
2114  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2115  recurrentToCellWeightsData);
2116 
2117  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2118  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
2119  ConstTensor recurrentToOutputWeights(
2120  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2121  recurrentToOutputWeightsData);
2122 
2123  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2124  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
2125  ConstTensor forgetGateBias(
2126  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2127  forgetGateBiasData);
2128 
2129  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2130  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
2131  ConstTensor cellBias(
2132  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2133  cellBiasData);
2134 
2135  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2136  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
2137  ConstTensor outputGateBias(
2138  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2139  outputGateBiasData);
2140 
2141  // CIFG disabled params
2142  std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2143  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
2144  ConstTensor inputToInputWeights(
2145  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2146  inputToInputWeightsData);
2147 
2148  std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2149  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
2150  ConstTensor recurrentToInputWeights(
2151  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2152  recurrentToInputWeightsData);
2153 
2154  std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2155  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
2156  ConstTensor inputGateBias(
2157  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2158  inputGateBiasData);
2159 
2160  // Layer Norm enabled, CIFG disabled params
2161  std::vector<int16_t> inputLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2162  std::vector<unsigned int> inputLayerNormWeightsDimensions = {1, 1, 3, 3};
2163  ConstTensor inputLayerNormWeights(
2164  TensorInfo(4, inputLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
2165  inputLayerNormWeightsData);
2166 
2167  std::vector<int16_t> forgetLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2168  std::vector<unsigned int> forgetLayerNormWeightsDimensions = {1, 1, 3, 3};
2169  ConstTensor forgetLayerNormWeights(
2170  TensorInfo(4, forgetLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
2171  forgetLayerNormWeightsData);
2172 
2173  std::vector<int16_t> cellLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2174  std::vector<unsigned int> cellLayerNormWeightsDimensions = {1, 1, 3, 3};
2175  ConstTensor cellLayerNormWeights(
2176  TensorInfo(4, cellLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
2177  cellLayerNormWeightsData);
2178 
2179  std::vector<int16_t> outputLayerNormWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2180  std::vector<unsigned int> outputLayerNormWeightsDimensions = {1, 1, 3, 3};
2181  ConstTensor outputLayerNormWeights(
2182  TensorInfo(4, outputLayerNormWeightsDimensions.data(), DataType::QSymmS16, 0.0f, 0, true),
2183  outputLayerNormWeightsData);
2184 
2185  LstmInputParams params;
2186 
2187  // Basic params
2188  params.m_InputToForgetWeights = &inputToForgetWeights;
2189  params.m_InputToCellWeights = &inputToCellWeights;
2190  params.m_InputToOutputWeights = &inputToOutputWeights;
2191  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
2192  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
2193  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
2194  params.m_ForgetGateBias = &forgetGateBias;
2195  params.m_CellBias = &cellBias;
2196  params.m_OutputGateBias = &outputGateBias;
2197 
2198  // CIFG disabled params
2199  params.m_InputToInputWeights = &inputToInputWeights;
2200  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
2201  params.m_InputGateBias = &inputGateBias;
2202 
2203  // Layer Norm enabled, CIFG disabled params
2204  params.m_InputLayerNormWeights = &inputLayerNormWeights;
2205  params.m_ForgetLayerNormWeights = &forgetLayerNormWeights;
2206  params.m_CellLayerNormWeights = &cellLayerNormWeights;
2207  params.m_OutputLayerNormWeights = &outputLayerNormWeights;
2208 
2209  TestQLstmLayerVisitor visitor(descriptor, params);
2210 
2211  NetworkImpl net;
2212 
2213  IConnectableLayer* const layer = net.AddQLstmLayer(descriptor, params);
2214  layer->ExecuteStrategy(visitor);
2215 }
2216 
2217 
2218 TEST_CASE("CheckQuantizedLstmLayer")
2219 {
2220  std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2221  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
2222  ConstTensor inputToInputWeights(
2223  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2224  inputToInputWeightsData);
2225 
2226  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2227  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
2228  ConstTensor inputToForgetWeights(
2229  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2230  inputToForgetWeightsData);
2231 
2232  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2233  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
2234  ConstTensor inputToCellWeights(
2235  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2236  inputToCellWeightsData);
2237 
2238  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2239  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
2240  ConstTensor inputToOutputWeights(
2241  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2242  inputToOutputWeightsData);
2243 
2244 
2245  std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2246  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
2247  ConstTensor recurrentToInputWeights(
2248  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2249  recurrentToInputWeightsData);
2250 
2251  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2252  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
2253  ConstTensor recurrentToForgetWeights(
2254  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2255  recurrentToForgetWeightsData);
2256 
2257  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2258  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
2259  ConstTensor recurrentToCellWeights(
2260  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2261  recurrentToCellWeightsData);
2262 
2263  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2264  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
2265  ConstTensor recurrentToOutputWeights(
2266  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QSymmS8, 0.0f, 0, true),
2267  recurrentToOutputWeightsData);
2268 
2269 
2270  std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2271  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
2272  ConstTensor inputGateBias(
2273  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2274  inputGateBiasData);
2275 
2276  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2277  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
2278  ConstTensor forgetGateBias(
2279  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2280  forgetGateBiasData);
2281 
2282  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2283  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
2284  ConstTensor cellBias(
2285  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2286  cellBiasData);
2287 
2288  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2289  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
2290  ConstTensor outputGateBias(
2291  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2292  outputGateBiasData);
2293 
2294  QuantizedLstmInputParams params;
2295 
2296  params.m_InputToInputWeights = &inputToInputWeights;
2297  params.m_InputToForgetWeights = &inputToForgetWeights;
2298  params.m_InputToCellWeights = &inputToCellWeights;
2299  params.m_InputToOutputWeights = &inputToOutputWeights;
2300 
2301  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
2302  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
2303  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
2304  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
2305 
2306  params.m_InputGateBias = &inputGateBias;
2307  params.m_ForgetGateBias = &forgetGateBias;
2308  params.m_CellBias = &cellBias;
2309  params.m_OutputGateBias = &outputGateBias;
2310 
2311  TestQuantizedLstmLayerVisitor visitor(params);
2312 
2313  NetworkImpl net;
2314 
2315  IConnectableLayer* const layer = net.AddQuantizedLstmLayer(params);
2316  layer->ExecuteStrategy(visitor);
2317 }
2318 
2319 TEST_CASE("CheckNamedQuantizedLstmLayer")
2320 {
2321  const char* layerName = "LstmLayer";
2322  std::vector<uint8_t> inputToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2323  std::vector<unsigned int> inputToInputWeightsDimensions = {1, 1, 3, 3};
2324  ConstTensor inputToInputWeights(
2325  TensorInfo(4, inputToInputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2326  inputToInputWeightsData);
2327 
2328  std::vector<uint8_t> inputToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2329  std::vector<unsigned int> inputToForgetWeightsDimensions = {1, 1, 3, 3};
2330  ConstTensor inputToForgetWeights(
2331  TensorInfo(4, inputToForgetWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2332  inputToForgetWeightsData);
2333 
2334  std::vector<uint8_t> inputToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2335  std::vector<unsigned int> inputToCellWeightsDimensions = {1, 1, 3, 3};
2336  ConstTensor inputToCellWeights(
2337  TensorInfo(4, inputToCellWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2338  inputToCellWeightsData);
2339 
2340  std::vector<uint8_t> inputToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2341  std::vector<unsigned int> inputToOutputWeightsDimensions = {1, 1, 3, 3};
2342  ConstTensor inputToOutputWeights(
2343  TensorInfo(4, inputToOutputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2344  inputToOutputWeightsData);
2345 
2346 
2347  std::vector<uint8_t> recurrentToInputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2348  std::vector<unsigned int> recurrentToInputWeightsDimensions = {1, 1, 3, 3};
2349  ConstTensor recurrentToInputWeights(
2350  TensorInfo(4, recurrentToInputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2351  recurrentToInputWeightsData);
2352 
2353  std::vector<uint8_t> recurrentToForgetWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2354  std::vector<unsigned int> recurrentToForgetWeightsDimensions = {1, 1, 3, 3};
2355  ConstTensor recurrentToForgetWeights(
2356  TensorInfo(4, recurrentToForgetWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2357  recurrentToForgetWeightsData);
2358 
2359  std::vector<uint8_t> recurrentToCellWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2360  std::vector<unsigned int> recurrentToCellWeightsDimensions = {1, 1, 3, 3};
2361  ConstTensor recurrentToCellWeights(
2362  TensorInfo(4, recurrentToCellWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2363  recurrentToCellWeightsData);
2364 
2365  std::vector<uint8_t> recurrentToOutputWeightsData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2366  std::vector<unsigned int> recurrentToOutputWeightsDimensions = {1, 1, 3, 3};
2367  ConstTensor recurrentToOutputWeights(
2368  TensorInfo(4, recurrentToOutputWeightsDimensions.data(), DataType::QAsymmU8, 0.0f, 0, true),
2369  recurrentToOutputWeightsData);
2370 
2371 
2372  std::vector<int32_t> inputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2373  std::vector<unsigned int> inputGateBiasDimensions = {1, 1, 3, 3};
2374  ConstTensor inputGateBias(
2375  TensorInfo(4, inputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2376  inputGateBiasData);
2377 
2378  std::vector<int32_t> forgetGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2379  std::vector<unsigned int> forgetGateBiasDimensions = {1, 1, 3, 3};
2380  ConstTensor forgetGateBias(
2381  TensorInfo(4, forgetGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2382  forgetGateBiasData);
2383 
2384  std::vector<int32_t> cellBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2385  std::vector<unsigned int> cellBiasDimensions = {1, 1, 3, 3};
2386  ConstTensor cellBias(
2387  TensorInfo(4, cellBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2388  cellBiasData);
2389 
2390  std::vector<int32_t> outputGateBiasData = {1, 2, 3, 4, 5, 6, 7, 8, 9};
2391  std::vector<unsigned int> outputGateBiasDimensions = {1, 1, 3, 3};
2392  ConstTensor outputGateBias(
2393  TensorInfo(4, outputGateBiasDimensions.data(), DataType::Signed32, 0.0f, 0, true),
2394  outputGateBiasData);
2395 
2396  QuantizedLstmInputParams params;
2397 
2398  params.m_InputToInputWeights = &inputToInputWeights;
2399  params.m_InputToForgetWeights = &inputToForgetWeights;
2400  params.m_InputToCellWeights = &inputToCellWeights;
2401  params.m_InputToOutputWeights = &inputToOutputWeights;
2402 
2403  params.m_RecurrentToInputWeights = &recurrentToInputWeights;
2404  params.m_RecurrentToForgetWeights = &recurrentToForgetWeights;
2405  params.m_RecurrentToCellWeights = &recurrentToCellWeights;
2406  params.m_RecurrentToOutputWeights = &recurrentToOutputWeights;
2407 
2408  params.m_InputGateBias = &inputGateBias;
2409  params.m_ForgetGateBias = &forgetGateBias;
2410  params.m_CellBias = &cellBias;
2411  params.m_OutputGateBias = &outputGateBias;
2412 
2413  TestQuantizedLstmLayerVisitor visitor(params, layerName);
2414 
2415  NetworkImpl net;
2416 
2417  IConnectableLayer* const layer = net.AddQuantizedLstmLayer(params, layerName);
2418  layer->ExecuteStrategy(visitor);
2419 }
2420 
2421 }

◆ TopKSort()

void TopKSort ( unsigned int  k,
unsigned int *  indices,
const float *  values,
unsigned int  numElement 
)

Definition at line 24 of file DetectionPostProcess.cpp.

Referenced by DetectionPostProcess(), NonMaxSuppression(), and TEST_SUITE().

25 {
26  std::partial_sort(indices, indices + k, indices + numElement,
27  [&values](unsigned int i, unsigned int j) { return values[i] > values[j]; });
28 }

◆ TransposeConvolution2dImpl()

void TransposeConvolution2dImpl ( const TransposeConvolution2dDescriptor descriptor,
const TensorShape inputShape,
Decoder< float > &  inputDecoder,
const TensorShape outputShape,
Encoder< float > &  outputEncoder,
const TensorShape weightsShape,
Decoder< float > &  weightsDecoder,
Decoder< float > *  biasesDecoder 
)

Definition at line 15 of file TransposeConvolution2d.cpp.

References Decoder< IType >::DecodeTensor(), Decoder< IType >::Get(), DataLayoutIndexed::GetChannelsIndex(), DataLayoutIndexed::GetHeightIndex(), DataLayoutIndexed::GetIndex(), TensorShape::GetNumElements(), DataLayoutIndexed::GetWidthIndex(), TransposeConvolution2dDescriptor::m_BiasEnabled, TransposeConvolution2dDescriptor::m_DataLayout, TransposeConvolution2dDescriptor::m_PadLeft, TransposeConvolution2dDescriptor::m_PadTop, TransposeConvolution2dDescriptor::m_StrideX, TransposeConvolution2dDescriptor::m_StrideY, NHWC, and Encoder< IType >::Set().

Referenced by RefTransposeConvolution2dWorkload::ExecuteAsync().

23 {
24  if (descriptor.m_BiasEnabled && !biasesDecoder)
25  {
26  throw InvalidArgumentException("Biases enabled but no bias data provided");
27  }
28  const DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
29  const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
30  const unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
31  const unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
32 
33  const unsigned int numBatches = inputShape[0];
34 
35  const unsigned int inputWidth = inputShape[widthIndex];
36  const unsigned int inputHeight = inputShape[heightIndex];
37  const unsigned int inputDepth = inputShape[channelsIndex];
38 
39  const unsigned int weightsHeight = weightsShape[heightIndex];
40  const unsigned int weightsWidth = weightsShape[widthIndex];
41  const unsigned int weightsDepth = weightsShape[channelsIndex];
42 
43  const unsigned int outputHeight = outputShape[heightIndex];
44  const unsigned int outputWidth = outputShape[widthIndex];
45  const unsigned int outputDepth = outputShape[channelsIndex];
46 
47  const unsigned int paddingLeft = descriptor.m_PadLeft;
48  const unsigned int paddingTop = descriptor.m_PadTop;
49 
50  const unsigned int strideX = descriptor.m_StrideX;
51  const unsigned int strideY = descriptor.m_StrideY;
52 
53  std::vector<float> outputBuffer(outputShape.GetNumElements(), 0);
54 
55  const std::vector<float> inputVec = inputDecoder.DecodeTensor(inputShape);
56  const std::vector<float> filterVec = weightsDecoder.DecodeTensor(weightsShape);
57 
58  for (unsigned int batch = 0u; batch < numBatches; ++batch)
59  {
60  for (unsigned int yInput = 0u; yInput < inputHeight; ++yInput)
61  {
62  for (unsigned int xInput = 0u; xInput < inputWidth; ++xInput)
63  {
64  unsigned int xOutputOrigin = xInput * strideX - paddingLeft;
65  unsigned int yOutputOrigin = yInput * strideY - paddingTop;
66 
67  for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
68  {
69  for (unsigned int yWeights = 0u; yWeights < weightsHeight; ++yWeights)
70  {
71  for (unsigned int xWeights = 0u; xWeights < weightsWidth; ++xWeights)
72  {
73  unsigned int yOutput = yOutputOrigin + yWeights;
74  unsigned int xOutput = xOutputOrigin + xWeights;
75 
76  if (yOutput < outputHeight && xOutput< outputWidth)
77  {
78  for (unsigned int dInput = 0u; dInput < inputDepth; dInput++)
79  {
80  unsigned int inputIndex;
81  unsigned int outputIndex;
82  unsigned int weightsIndex;
83 
84  if(descriptor.m_DataLayout == armnn::DataLayout::NHWC)
85  {
86  inputIndex = batch * inputHeight * inputWidth * inputDepth +
87  yInput * inputWidth * inputDepth +
88  xInput * inputDepth +
89  dInput;
90 
91  weightsIndex = dOutput * weightsHeight * weightsWidth * weightsDepth +
92  yWeights * weightsWidth * weightsDepth +
93  xWeights * weightsDepth +
94  dInput;
95 
96  outputIndex = batch * outputHeight * outputWidth * outputDepth +
97  yOutput * outputWidth * outputDepth +
98  xOutput * outputDepth +
99  dOutput;
100  }
101  else
102  {
103  inputIndex = batch * inputDepth * inputHeight * inputWidth +
104  dInput * inputHeight * inputWidth +
105  yInput * inputWidth +
106  xInput;
107 
108  weightsIndex = dOutput * weightsDepth * weightsHeight * weightsWidth +
109  dInput * weightsHeight * weightsWidth +
110  yWeights * weightsWidth +
111  xWeights;
112 
113  outputIndex = batch * outputDepth * outputHeight * outputWidth +
114  dOutput * outputHeight * outputWidth +
115  yOutput * outputWidth +
116  xOutput;
117  }
118 
119  outputBuffer[outputIndex] += inputVec[inputIndex] * filterVec[weightsIndex];
120  }
121  }
122  }
123  }
124 
125  }
126  }
127  }
128  }
129 
130  // Apply bias (if enabled)
131  if (descriptor.m_BiasEnabled)
132  {
133  outputEncoder[0];
134  Decoder<float>& rBiasesDecoder = *biasesDecoder;
135 
136  for (unsigned int batch = 0u; batch < numBatches; ++batch)
137  {
138  for (unsigned int dOutput = 0u; dOutput < outputDepth; ++dOutput)
139  {
140  rBiasesDecoder[dOutput];
141  for (unsigned int yOutput = 0u; yOutput < outputHeight; ++yOutput)
142  {
143  for (unsigned int xOutput = 0u; xOutput < outputWidth; ++xOutput)
144  {
145  const unsigned int outputIndex =
146  dataLayoutIndexed.GetIndex(outputShape, batch, dOutput, yOutput, xOutput);
147  outputBuffer[outputIndex] += rBiasesDecoder.Get();
148  }
149  }
150  }
151  }
152  }
153  outputEncoder[0];
154  for (float output : outputBuffer)
155  {
156  outputEncoder.Set(output);
157  ++outputEncoder;
158  }
159 }
virtual std::vector< float > DecodeTensor(const TensorShape &tensorShape, bool isDepthwise=false)=0
virtual void Set(IType right)=0
virtual IType Get() const =0
Provides access to the appropriate indexes for Channels, Height and Width based on DataLayout...

◆ TrueFunc()

bool armnn::TrueFunc ( Optional< std::string &>  reasonIfUnsupported,
Params &&...  params 
)

Definition at line 54 of file LayerSupportCommon.hpp.

References IgnoreUnused().

55 {
56  IgnoreUnused(reasonIfUnsupported);
57  IgnoreUnused(params...);
58  return true;
59 }
void IgnoreUnused(Ts &&...)

◆ VerifyClContextBuffer()

bool armnn::VerifyClContextBuffer ( flatbuffers::Verifier &  verifier)
inline

Definition at line 157 of file ClContextSchema_generated.h.

References ClContextIdentifier().

158  {
159  return verifier.VerifyBuffer<armnn::ClContext>(ClContextIdentifier());
160 }
const char * ClContextIdentifier()

◆ VerifySizePrefixedClContextBuffer()

bool armnn::VerifySizePrefixedClContextBuffer ( flatbuffers::Verifier &  verifier)
inline

Definition at line 162 of file ClContextSchema_generated.h.

References ClContextIdentifier().

163  {
164  return verifier.VerifySizePrefixedBuffer<armnn::ClContext>(ClContextIdentifier());
165 }
const char * ClContextIdentifier()

◆ VerifyTensorInfoDataType()

void armnn::VerifyTensorInfoDataType ( const armnn::TensorInfo info,
armnn::DataType  dataType 
)
inline

Definition at line 337 of file TypesUtils.hpp.

References TensorInfo::GetDataType(), GetDataTypeName(), and TensorInfo::GetShape().

Referenced by ParserFlatbuffersFixture::CheckTensors(), ParserFlatbuffersSerializeFixture::RunTest(), and ParserFlatbuffersFixture::RunTest().

338 {
339  if (info.GetDataType() != dataType)
340  {
341  std::stringstream ss;
342  ss << "Unexpected datatype:" << armnn::GetDataTypeName(info.GetDataType())
343  << " for tensor:" << info.GetShape()
344  << ". The type expected to be: " << armnn::GetDataTypeName(dataType);
345  throw armnn::Exception(ss.str());
346  }
347 }
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:202
DataType GetDataType() const
Definition: Tensor.hpp:198
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

◆ WrapClError()

RuntimeException armnn::WrapClError ( const cl::Error clError,
const CheckLocation location 
)
inline

Definition at line 147 of file ClWorkloadUtils.hpp.

References Exception::what().

Referenced by ClWorkloadFactory::AfterWorkloadsCreated(), and RunClFunction().

148 {
149  std::stringstream message;
150  message << "CL error: " << clError.what() << ". Error code: " << clError.err();
151 
152  return RuntimeException(message.str(), location);
153 }

Variable Documentation

◆ cpuAccCapabilities

const BackendCapabilities cpuAccCapabilities("CpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

◆ cpuRefCapabilities

const BackendCapabilities cpuRefCapabilities("CpuRef", { {"NonConstWeights", true}, {"AsyncExecution", true}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", true}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

◆ EXPIRE_RATE

constexpr unsigned int EXPIRE_RATE = 3U

Variable to control expire rate of priority queue.

Definition at line 37 of file Types.hpp.

Referenced by Threadpool::TerminateThreadPool().

◆ g_AggregateProfilingEventsByInference

constexpr bool g_AggregateProfilingEventsByInference = true

Definition at line 37 of file Profiling.cpp.

◆ g_ProfilingEventCountHint

constexpr std::size_t g_ProfilingEventCountHint = 1024

Definition at line 29 of file Profiling.cpp.

◆ g_WriteProfilingEventSequence

constexpr bool g_WriteProfilingEventSequence = true

Definition at line 32 of file Profiling.cpp.

◆ g_WriteReportToStdOutOnProfilerDestruction

constexpr bool g_WriteReportToStdOutOnProfilerDestruction = false

Definition at line 41 of file Profiling.cpp.

◆ gpuAccCapabilities

const BackendCapabilities gpuAccCapabilities("GpuAcc", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", true}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", true}, {"MultiAxisPacking", false}, {"SingleAxisPacking", true} })

◆ LOWEST_CAPTURE_PERIOD

constexpr unsigned int LOWEST_CAPTURE_PERIOD = 10000u

The lowest performance data capture interval we support is 10 miliseconds.

Definition at line 34 of file Types.hpp.

Referenced by TEST_SUITE().

◆ MaxNumOfTensorDimensions

◆ oldCpuRefCapabilities

const std::set<armnn::BackendCapability> oldCpuRefCapabilities
Initial value:
{
}
Constant weights can be accessed through the descriptors, On the other hand, non-const weights can be...

Definition at line 24 of file RefBackend.hpp.

◆ paddingRequiredLayers

const std::set<armnn::LayerType> paddingRequiredLayers
Initial value:
{
LayerType::Convolution2d,
LayerType::DepthwiseConvolution2d,
LayerType::Lstm,
LayerType::Mean,
LayerType::QuantizedLstm,
LayerType::TransposeConvolution2d
}
float Dequantize(QuantizedType value, float scale, int32_t offset)
Dequantize an 8-bit data type into a floating point data type.
Definition: TypesUtils.cpp:46
void Stack(const StackQueueDescriptor &data, std::vector< std::unique_ptr< Decoder< float >>> &inputs, Encoder< float > &output, const TensorInfo &inputInfo, const TensorInfo &outputInfo)
Definition: Stack.cpp:12
void DepthToSpace(const TensorInfo &inputInfo, const DepthToSpaceDescriptor &descriptor, const void *inputData, void *outputData, unsigned int dataTypeSize)
void ArgMinMax(Decoder< float > &in, OUT *out, const TensorInfo &inputTensorInfo, const TensorInfo &outputTensorInfo, ArgMinMaxFunction function, int axis)
Definition: ArgMinMax.cpp:16
void Permute(const armnn::TensorShape &dstShape, const armnn::PermutationVector &mappings, const void *src, void *dst, size_t dataTypeSize)
Definition: Permute.cpp:131
void Gather(const TensorInfo &paramsInfo, const TensorInfo &indicesInfo, const TensorInfo &outputInfo, Decoder< float > &params, const int32_t *indices, Encoder< float > &output, const int32_t axis)
Definition: Gather.cpp:17
QuantizedType Quantize(float value, float scale, int32_t offset)
Quantize a floating point data type into an 8-bit data type.
Definition: TypesUtils.cpp:30
void Pooling2d(Decoder< float > &rInputDecoder, Encoder< float > &rOutputEncoder, const TensorInfo &inputInfo, const TensorInfo &outputInfo, const Pooling2dDescriptor &params)
Computes the Pooling2d operation.
Definition: Pooling2d.cpp:142
void FullyConnected(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rWeightsShape, Decoder< float > &rWeightDecoder, Decoder< float > *pBiasDecoder, const bool biasEnabled, const unsigned int K, const bool transposeWeights)
Performs a matrix multiplication and optionally adds a bias.

Definition at line 16 of file NeonTensorHandleFactory.hpp.

Referenced by NeonTensorHandleFactory::GetCapabilities().

◆ tl_Profiler

thread_local IProfiler* tl_Profiler = nullptr

Definition at line 570 of file Profiling.cpp.

Referenced by ProfilerManager::GetProfiler().

◆ wordSize

constexpr size_t wordSize = sizeof(size_t) * 8