From 3f22d27f51c493e37b9da0692b6bf776f4430dcf Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Fri, 5 Nov 2021 12:26:41 +0000 Subject: IVGCVSW-6372 Change order in doxygen tree view Signed-off-by: Nikhil Raj Change-Id: Ia765d335ef998e7e47a1c0c81a375645972f4e1d --- docs/01_00_quick_start.dox | 25 + docs/01_00_software_tools.dox | 46 - docs/01_01_parsers.dox | 206 -- docs/01_02_deserializer_serializer.dox | 184 -- docs/01_03_delegate.dox | 178 -- docs/02_build_guides.dox | 35 - docs/02_operator_list.dox | 3333 ++++++++++++++++++++++++++++++++ docs/03_build_guides.dox | 35 + docs/03_use_guides.dox | 21 - docs/04_contributor.dox | 39 - docs/04_use_guides.dox | 21 + docs/05_contributor.dox | 39 + docs/05_operator_list.dox | 3333 -------------------------------- docs/06_00_software_tools.dox | 46 + docs/06_01_parsers.dox | 206 ++ docs/06_02_deserializer_serializer.dox | 184 ++ docs/06_03_delegate.dox | 178 ++ docs/Doxyfile | 20 +- 18 files changed, 4078 insertions(+), 4051 deletions(-) create mode 100644 docs/01_00_quick_start.dox delete mode 100644 docs/01_00_software_tools.dox delete mode 100644 docs/01_01_parsers.dox delete mode 100644 docs/01_02_deserializer_serializer.dox delete mode 100644 docs/01_03_delegate.dox delete mode 100644 docs/02_build_guides.dox create mode 100644 docs/02_operator_list.dox create mode 100644 docs/03_build_guides.dox delete mode 100644 docs/03_use_guides.dox delete mode 100644 docs/04_contributor.dox create mode 100644 docs/04_use_guides.dox create mode 100644 docs/05_contributor.dox delete mode 100644 docs/05_operator_list.dox create mode 100644 docs/06_00_software_tools.dox create mode 100644 docs/06_01_parsers.dox create mode 100644 docs/06_02_deserializer_serializer.dox create mode 100644 docs/06_03_delegate.dox diff --git a/docs/01_00_quick_start.dox b/docs/01_00_quick_start.dox new file mode 100644 index 0000000000..a2a1efa50d --- /dev/null +++ b/docs/01_00_quick_start.dox @@ -0,0 +1,25 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page quickstart Quick Start Guide + +On this page you can find guides which allow you to get setup and ready to run models with Arm NN quickly. +These guides rely on using the apt packages or prebuilt binaries from our release notes. We only have added a quick start guide that uses TfLite-Delegate. +More guides will be added in the future. + + - @subpage md_delegate_DelegateQuickStartGuide +**/ +} + +namespace armnn +{ +/** + +@page md_delegate_DelegateQuickStartGuide TfLite Delegate Quick Start Guide +**/ +} diff --git a/docs/01_00_software_tools.dox b/docs/01_00_software_tools.dox deleted file mode 100644 index e560f44882..0000000000 --- a/docs/01_00_software_tools.dox +++ /dev/null @@ -1,46 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page swtools Software Tools - -On this page you can find all software tools contained in the Arm NN repository. You will find links to how-to guides and -other helpful information in each section. - - - @subpage delegate - - @subpage parsers - - @subpage md_python_pyarmnn_README - - @subpage serializer - - @subpage deserializer - - @subpage md_src_armnnConverter_README - - @subpage md_tests_ImageCSVFileGenerator_README - - @subpage md_tests_ImageTensorGenerator_README - - @subpage md_tests_ModelAccuracyTool-Armnn_README -**/ -} - - -/// Create pages for each tool so they appear nicely in the doxygen tree-view. Subpages are not listed there. -/// -/// Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox -namespace armnn -{ -/** - -@page md_python_pyarmnn_README PyArmNN - -@page md_src_armnnConverter_README Converter - -@page md_tests_ModelAccuracyTool-Armnn_README ModelAccuracyTool - -@page md_tests_ImageCSVFileGenerator_README ImageCSVFileGenerator - -@page md_tests_ImageTensorGenerator_README ImageTensorGenerator - -**/ -} - diff --git a/docs/01_01_parsers.dox b/docs/01_01_parsers.dox deleted file mode 100644 index 186ed6193a..0000000000 --- a/docs/01_01_parsers.dox +++ /dev/null @@ -1,206 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page parsers Parsers - -@tableofcontents -Execute models from different machine learning platforms efficiently with our parsers. Simply choose a parser according -to the model you want to run e.g. If you've got a model in onnx format (.onnx) use our onnx-parser. - -If you would like to run a Tensorflow Lite (TfLite) model you probably also want to take a look at our @ref delegate. - -All parsers are written in C++ but it is also possible to use them in python. For more information on our python -bindings take a look into the @ref md_python_pyarmnn_README section. - -

- - - - -@section S5_onnx_parser Arm NN Onnx Parser - -`armnnOnnxParser` is a library for loading neural networks defined in ONNX protobuf files into the Arm NN runtime. - -## ONNX operators that the Arm NN SDK supports - -This reference guide provides a list of ONNX operators the Arm NN SDK currently supports. - -The Arm NN SDK ONNX parser currently only supports fp32 operators. - -### Fully supported - -- Add - - See the ONNX [Add documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Add) for more information - -- AveragePool - - See the ONNX [AveragePool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#AveragePool) for more information. - -- Concat - - See the ONNX [Concat documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Concat) for more information. - -- Constant - - See the ONNX [Constant documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Constant) for more information. - -- Clip - - See the ONNX [Clip documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Clip) for more information. - -- Flatten - - See the ONNX [Flatten documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Flatten) for more information. - -- Gather - - See the ONNX [Gather documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gather) for more information. - -- GlobalAveragePool - - See the ONNX [GlobalAveragePool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#GlobalAveragePool) for more information. - -- LeakyRelu - - See the ONNX [LeakyRelu documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#LeakyRelu) for more information. - -- MaxPool - - See the ONNX [max_pool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#MaxPool) for more information. - -- Relu - - See the ONNX [Relu documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Relu) for more information. - -- Reshape - - See the ONNX [Reshape documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Reshape) for more information. - -- Shape - - See the ONNX [Shape documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Shape) for more information. - -- Sigmoid - - See the ONNX [Sigmoid documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Sigmoid) for more information. - -- Tanh - - See the ONNX [Tanh documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Tanh) for more information. - -- Unsqueeze - - See the ONNX [Unsqueeze documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Unsqueeze) for more information. - -### Partially supported - -- Conv - - The parser only supports 2D convolutions with a group = 1 or group = #Nb_of_channel (depthwise convolution) -- BatchNormalization - - The parser does not support training mode. See the ONNX [BatchNormalization documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#BatchNormalization) for more information. -- Gemm - - The parser only supports constant bias or non-constant bias where bias dimension = 1. See the ONNX [Gemm documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gemm) for more information. -- MatMul - - The parser only supports constant weights in a fully connected layer. - -## Tested networks - -Arm tested these operators with the following ONNX fp32 neural networks: -- Mobilenet_v2. See the ONNX [MobileNet documentation](https://github.com/onnx/models/tree/master/vision/classification/mobilenet) for more information. -- Simple MNIST. This is no longer directly documented by ONNX. The model and test data may be downloaded [from the ONNX model zoo](https://onnxzoo.blob.core.windows.net/models/opset_8/mnist/mnist.tar.gz). - -More machine learning operators will be supported in future releases. -



- - - - -@section S6_tf_lite_parser Arm NN Tf Lite Parser - -`armnnTfLiteParser` is a library for loading neural networks defined by TensorFlow Lite FlatBuffers files -into the Arm NN runtime. - -## TensorFlow Lite operators that the Arm NN SDK supports - -This reference guide provides a list of TensorFlow Lite operators the Arm NN SDK currently supports. - -### Fully supported -The Arm NN SDK TensorFlow Lite parser currently supports the following operators: - -- ABS -- ADD -- ARG_MAX -- ARG_MIN -- AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- BATCH_TO_SPACE -- CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- CONV_3D, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- DEPTH_TO_SPACE -- DEPTHWISE_CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- DEQUANTIZE -- DIV -- ELU -- EQUAL -- EXP -- EXPAND_DIMS -- FULLY_CONNECTED, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- GATHER -- GREATER -- GREATER_EQUAL -- HARD_SWISH -- LEAKY_RELU -- LESS -- LESS_EQUAL -- LOGICAL_NOT -- LOGISTIC -- L2_NORMALIZATION -- MAX_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE -- MAXIMUM -- MEAN -- MINIMUM -- MIRROR_PAD -- MUL -- NEG -- NOT_EQUAL -- PACK -- PAD -- PRELU -- QUANTIZE -- RELU -- RELU6 -- REDUCE_MAX -- REDUCE_MIN -- RESHAPE -- RESIZE_BILINEAR -- RESIZE_NEAREST_NEIGHBOR -- RSQRT -- SHAPE -- SLICE -- SOFTMAX -- SPACE_TO_BATCH -- SPLIT -- SPLIT_V -- SQUEEZE -- STRIDED_SLICE -- SUB -- SUM -- TANH -- TRANSPOSE -- TRANSPOSE_CONV -- UNPACK - -### Custom Operator -- TFLite_Detection_PostProcess - -## Tested networks -Arm tested these operators with the following TensorFlow Lite neural network: -- [Quantized MobileNet](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224_quant.tgz) -- [Quantized SSD MobileNet](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz) -- DeepSpeech v1 converted from [TensorFlow model](https://github.com/mozilla/DeepSpeech/releases/tag/v0.4.1) -- DeepSpeaker -- [DeepLab v3+](https://www.tensorflow.org/lite/models/segmentation/overview) -- FSRCNN -- EfficientNet-lite -- RDN converted from [TensorFlow model](https://github.com/hengchuan/RDN-TensorFlow) -- Quantized RDN (CpuRef) -- [Quantized Inception v3](http://download.tensorflow.org/models/tflite_11_05_08/inception_v3_quant.tgz) -- [Quantized Inception v4](http://download.tensorflow.org/models/inception_v4_299_quant_20181026.tgz) (CpuRef) -- Quantized ResNet v2 50 (CpuRef) -- Quantized Yolo v3 (CpuRef) - -More machine learning operators will be supported in future releases. - -**/ -} - diff --git a/docs/01_02_deserializer_serializer.dox b/docs/01_02_deserializer_serializer.dox deleted file mode 100644 index 5d4dc43a74..0000000000 --- a/docs/01_02_deserializer_serializer.dox +++ /dev/null @@ -1,184 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page serializer Serializer -@tableofcontents - -The `armnnSerializer` is a library for serializing an Arm NN network to a stream. - -@section serializersupport Supported Layers - -This reference guide provides a list of layers which can be serialized by the Arm NN SDK. - -@subsection serializersupportflully Fully supported - -The Arm NN SDK Serializer currently supports the following layers: - -- Activation -- Addition -- ArgMinMax -- BatchToSpaceNd -- BatchNormalization -- Cast -- ChannelShuffle -- Comparison -- Concat -- Constant -- Convolution2d -- Convolution3d -- DepthToSpace -- DepthwiseConvolution2d -- Dequantize -- DetectionPostProcess -- Division -- ElementwiseUnary -- Fill -- Floor -- FullyConnected -- Gather -- Input -- InstanceNormalization -- L2Normalization -- Logical -- LogSoftmax -- Lstm -- Maximum -- Mean -- Merge -- Minimum -- Multiplication -- Normalization -- Output -- Pad (Constant, Symmetric, Reflect) -- Permute -- Pooling2d -- Prelu -- QLstm -- Quantize -- QuantizedLstm -- Rank -- Reduce -- Reshape -- Resize -- Shape -- Slice -- Softmax -- SpaceToBatchNd -- SpaceToDepth -- Splitter -- Stack -- StandIn -- StridedSlice -- Subtraction -- Switch -- Transpose -- TransposeConvolution2d -- UnidirectionalSequenceLstm - -More machine learning layers will be supported in future releases. - -@subsection serializersupportdeprecated Deprecated layers - -Some layers have been deprecated and replaced by others layers. In order to maintain backward compatibility, serializations of these deprecated layers will deserialize to the layers that have replaced them, as follows: - -- Abs will deserialize as ElementwiseUnary -- Equal will deserialize as Comparison -- Greater will deserialize as Comparison -- Merger will deserialize as Concat -- ResizeBilinear will deserialize as Resize -- Rsqrt will deserialize as ElementwiseUnary -



- -@page deserializer Deserializer -@tableofcontents - -The `armnnDeserializer` is a library for loading neural networks defined by Arm NN FlatBuffers files -into the Arm NN runtime. - -@section deserializersupport Supported Layers - -This reference guide provides a list of layers which can be deserialized by the Arm NN SDK. - -@subsection deserializersupportfully Fully supported - -The Arm NN SDK Deserialize parser currently supports the following layers: - -- Abs -- Activation -- Addition -- ArgMinMax -- BatchToSpaceNd -- BatchNormalization -- Cast -- ChannelShuffle -- Concat -- Comparison -- Constant -- Convolution2d -- DepthToSpace -- DepthwiseConvolution2d -- Dequantize -- DetectionPostProcess -- Division -- ElementwiseUnary -- Fill -- Floor -- FullyConnected -- Gather -- Input -- InstanceNormalization -- L2Normalization -- Logical -- LogSoftmax -- Lstm -- Maximum -- Mean -- Merge -- Minimum -- Multiplication -- Normalization -- Output -- Pad -- Permute -- Pooling2d -- Prelu -- Quantize -- QLstm -- QuantizedLstm -- Rank -- Reduce -- Reshape -- Resize -- ResizeBilinear -- Slice -- Softmax -- SpaceToBatchNd -- SpaceToDepth -- Splitter -- Stack -- StandIn -- StridedSlice -- Subtraction -- Switch -- Transpose -- TransposeConvolution2d -- UnidirectionalSequenceLstm - -More machine learning layers will be supported in future releases. - -@subsection deserializersupportdeprecated Deprecated layers - -Some layers have been deprecated and replaced by others layers. In order to maintain backward compatibility, serializations of these deprecated layers will deserialize to the layers that have replaced them, as follows: - -- Equal will deserialize as Comparison -- Merger will deserialize as Concat -- Greater will deserialize as Comparison -- ResizeBilinear will deserialize as Resize - -**/ -} \ No newline at end of file diff --git a/docs/01_03_delegate.dox b/docs/01_03_delegate.dox deleted file mode 100644 index b3caf8cbf8..0000000000 --- a/docs/01_03_delegate.dox +++ /dev/null @@ -1,178 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page delegate TfLite Delegate -@tableofcontents - - -@section delegateintro About the delegate -'armnnDelegate' is a library for accelerating certain TensorFlow Lite (TfLite) operators on Arm hardware. It can be -integrated in TfLite using its delegation mechanism. TfLite will then delegate the execution of operators supported by -Arm NN to Arm NN. - -The main difference to our @ref S6_tf_lite_parser is the amount of operators you can run with it. If none of the active -backends support an operation in your model you won't be able to execute it with our parser. In contrast to that, TfLite -only delegates operations to the armnnDelegate if it does support them and otherwise executes them itself. In other -words, every TfLite model can be executed and every operation in your model that we can accelerate will be accelerated. -That is the reason why the armnnDelegate is our recommended way to accelerate TfLite models. - -If you need help building the armnnDelegate, please take a look at our [build guide](delegate/BuildGuideNative.md). -An example how to setup TfLite to integrate the armnnDelegate can be found in this -guide: [Integrate the delegate into python](delegate/IntegrateDelegateIntoPython.md) - - -@section delegatesupport Supported Operators -This reference guide provides a list of TensorFlow Lite operators the Arm NN SDK currently supports. - -@subsection delegatefullysupported Fully supported - -The Arm NN SDK TensorFlow Lite delegate currently supports the following operators: - -- ABS - -- ADD - -- ARGMAX - -- ARGMIN - -- AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- BATCH_TO_SPACE_ND - -- CAST - -- CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- CONV_3D, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- DEPTH_TO_SPACE - -- DEPTHWISE_CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- DEQUANTIZE - -- DIV - -- EQUAL - -- ELU - -- EXP - -- FULLY_CONNECTED, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- FLOOR - -- GATHER - -- GREATER - -- GREATER_OR_EQUAL - -- HARD_SWISH - -- LESS - -- LESS_OR_EQUAL - -- LOCAL_RESPONSE_NORMALIZATION - -- LOGICAL_AND - -- LOGICAL_NOT - -- LOGICAL_OR - -- LOGISTIC - -- LOG_SOFTMAX - -- LSTM - -- L2_NORMALIZATION - -- L2_POOL_2D - -- MAXIMUM - -- MAX_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE - -- MEAN - -- MINIMUM - -- MIRROR_PAD - -- MUL - -- NEG - -- NOT_EQUAL - -- PACK - -- PAD - -- PRELU - -- QUANTIZE - -- RANK - -- REDUCE_MAX - -- REDUCE_MIN - -- RESHAPE - -- RESIZE_BILINEAR - -- RESIZE_NEAREST_NEIGHBOR - -- RELU - -- RELU6 - -- RSQRT - -- SHAPE - -- SOFTMAX - -- SPACE_TO_BATCH_ND - -- SPACE_TO_DEPTH - -- SPLIT - -- SPLIT_V - -- SQRT - -- STRIDED_SLICE - -- SUB - -- SUM - -- TANH - -- TRANSPOSE - -- TRANSPOSE_CONV - -- UNIDIRECTIONAL_SEQUENCE_LSTM - -- UNPACK - -More machine learning operators will be supported in future releases. -**/ -} \ No newline at end of file diff --git a/docs/02_build_guides.dox b/docs/02_build_guides.dox deleted file mode 100644 index e61354dd54..0000000000 --- a/docs/02_build_guides.dox +++ /dev/null @@ -1,35 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page buildguides Installation and build Guides -@tableofcontents - -This page links all guides to build and/or install Arm NN tools hosted in our repository. -You can find additional tutorials on -https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides - -Arm NN is written using portable C++14 and the build system uses [CMake](https://cmake.org/), therefore it is possible -to build for a wide variety of target platforms, from a wide variety of host environments. - -Arm NN is managed by a single CMake project. That allows you to build multiple tools at once by combining the CMake -options from different build guides. - -## Installation via APT Repository - - @subpage md_InstallationViaAptRepository - -## TfLite Delegate build guide - - @subpage md_delegate_BuildGuideNative - -## Android NDK and Arm NN - - @subpage md_BuildGuideAndroidNDK - -## Crosscompile Guide for Arm NN - - @subpage md_BuildGuideCrossCompilation - -**/ -} \ No newline at end of file diff --git a/docs/02_operator_list.dox b/docs/02_operator_list.dox new file mode 100644 index 0000000000..90aee130bf --- /dev/null +++ b/docs/02_operator_list.dox @@ -0,0 +1,3333 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page operator_list Arm NN Operators + +@tableofcontents + +@section S5_1_operator_list Arm NN Operators + +Arm NN supports operators that are listed in below table. + +Arm NN supports a wide list of data-types. +The main data-types that the Machine Learning functions support are the following: +
    +
  • BFLOAT16: 16-bit non-standard brain floating point +
  • QASYMMU8: 8-bit unsigned asymmetric quantized +
  • QASYMMS8: 8-bit signed asymmetric quantized +
  • QUANTIZEDSYMM8PERAXIS: 8-bit signed symmetric quantized +
  • QSYMMS8: 8-bit unsigned symmetric quantized +
  • QSYMMS16: 16-bit unsigned symmetric quantized +
  • FLOAT32: 32-bit single precision floating point +
  • FLOAT16: 16-bit half precision floating point +
  • SIGNED32: 32-bit signed integer +
  • BOOLEAN: 8-bit unsigned char +
  • All: Agnostic to any specific data type +
+ +Arm NN supports the following data layouts (fast changing dimension from right to left): +
    +
  • NHWC: Layout where channels are in the fastest changing dimension +
  • NCHW: Layout where width is in the fastest changing dimension +
  • All: Agnostic to any specific data layout +
+where N = batches, C = channels, H = height, W = width + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Operator + Description + Equivalent Android NNAPI Operator + Backends + Data Layouts + Data Types +
AbsLayer + Layer to perform absolute operation. + +
    +
  • ANEURALNETWORKS_ABS +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
ActivationLayer + Layer to simulate an activation layer with the specified activation function. + +
    +
  • ANEURALNETWORKS_ABS +
  • ANEURALNETWORKS_ELU +
  • ANEURALNETWORKS_HARD_SWISH +
  • ANEURALNETWORKS_LOGISTIC +
  • ANEURALNETWORKS_PRELU +
  • ANEURALNETWORKS_RELU +
  • ANEURALNETWORKS_RELU1 +
  • ANEURALNETWORKS_RELU6 +
  • ANEURALNETWORKS_SQRT +
  • ANEURALNETWORKS_TANH +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
FLOAT16 +
FLOAT32 +
+
AdditionLayer + Layer to add 2 tensors. + +
    +
  • ANEURALNETWORKS_ADD +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
ArgMinMaxLayer + Layer to calculate the index of the minimum or maximum values in a tensor + based on an axis. + +
    +
  • ANEURALNETWORKS_ARGMAX +
  • ANEURALNETWORKS_ARGMIN +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
SIGNED64 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
BatchNormalizationLayer + Layer to perform batch normalization. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT32 +
FLOAT16 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT32 +
FLOAT16 +
+
BatchToSpaceNdLayer + Layer to perform a batch to space transformation. + +
    +
  • ANEURALNETWORKS_BATCH_TO_SPACE_ND +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
CastLayer + Layer to cast a tensor to a type. + +
    +
  • ANEURALNETWORKS_CAST +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QSYMMS8 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
FLOAT16 +
SIGNED32 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
ChannelShuffleLayer + Layer to reorganize the channels of a tensor. + +
    +
  • ANEURALNETWORKS_CHANNEL_SHUFFLE +
+
CpuRef + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
QSYMMS8 +
QASYMMS8 +
QASYMMU8 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
FLOAT16 +
FLOAT32 +
+
ComparisonLayer + Layer to compare 2 tensors. + +
    +
  • ANEURALNETWORKS_EQUAL +
  • ANEURALNETWORKS_GREATER +
  • ANEURALNETWORKS_GREATER_EQUAL +
  • ANEURALNETWORKS_LESS +
  • ANEURALNETWORKS_LESS_EQUAL +
  • ANEURALNETWORKS_NOT_EQUAL +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
BOOLEAN +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
ConcatLayer + Layer to concatenate tensors along a given axis. + +
    +
  • ANEURALNETWORKS_CONCATENATION +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
ConstantLayer + Layer to provide a constant tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
ConvertBf16ToFp32Layer + Layer to convert BFloat16 tensor to Float32 tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
+
ConvertFp16ToFp32Layer + Layer to convert Float16 tensor to Float32 tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
ConvertFp32ToBf16Layer + Layer to convert Float32 tensor to BFloat16 tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
+
ConvertFp32ToFp16Layer + Layer to convert Float32 tensor to Float16 tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
Convolution2dLayer + Layer to compute a convolution operation. + +
    +
  • ANEURALNETWORKS_CONV_2D +
  • ANEURALNETWORKS_GROUPED_CONV_2D +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
SIGNED32 +
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
SIGNED32 +
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
+
Convolution3dLayer + Layer to compute a 3D convolution operation. + +
    +
  • N/A +
+
CpuRef + +
    +
  • NDHWC +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • N/A +
+
+
    +
  • N/A +
+
GpuAcc + +
    +
  • N/A +
+
+
    +
  • N/A +
+
DebugLayer + Layer to print out inter layer tensor information. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
SIGNED32 +
+
DepthToSpaceLayer + Layer to perform Depth to Space transformation. + +
    +
  • ANEURALNETWORKS_DEPTH_TO_SPACE +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
DepthwiseConvolution2dLayer + Layer to compute a deconvolution or transpose convolution. + +
    +
  • ANEURALNETWORKS_DEPTHWISE_CONV_2D +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT16 +
FLOAT32 +
SIGNED32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT16 +
FLOAT32 +
SIGNED32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
+
DequantizeLayer + Layer to dequantize the values in a tensor. + +
    +
  • ANEURALNETWORKS_DEQUANTIZE +
+
CpuRef + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
QSYMMS8 +
QSYMMS16 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
QSYMMS8 +
QSYMMS16 +
+
DetectionPostProcessLayer + Layer to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). + +
    +
  • ANEURALNETWORKS_DETECTION_POSTPROCESSING +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT32 +
+
DivisionLayer + Layer to divide 2 tensors. + +
    +
  • ANEURALNETWORKS_DIV +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
ElementwiseBaseLayer + Layer to perform Add - Div - Max - Min - Mul operations. + +
    +
  • ANEURALNETWORKS_ADD +
  • ANEURALNETWORKS_DIV +
  • ANEURALNETWORKS_MAXIMUM +
  • ANEURALNETWORKS_MINIMUM +
  • ANEURALNETWORKS_MUL +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
ElementwiseUnaryLayer + Layer to perform Rsqrt - Exp - Neg - Log - Abs - Sin - Sqrt operations. + +
    +
  • ANEURALNETWORKS_ABS +
  • ANEURALNETWORKS_EXP +
  • ANEURALNETWORKS_LOG +
  • ANEURALNETWORKS_NEG +
  • ANEURALNETWORKS_RSQRT +
  • ANEURALNETWORKS_SIN +
  • ANEURALNETWORKS_SQRT +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
FakeQuantizationLayer + Layer to quantize float values and dequantize afterwards. The current implementation does not dequantize the values. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
FLOAT32 +
+
FillLayer + Layer to set the values of a tensor with a given value. + +
    +
  • ANEURALNETWORKS_FILL +
+
CpuRef + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
FloorLayer + Layer to round the value to the lowest whole number. + +
    +
  • ANEURALNETWORKS_FLOOR +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT32 +
FLOAT16 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT32 +
FLOAT16 +
+
FullyConnectedLayer + Layer to perform a fully connected / dense operation. + +
    +
  • ANEURALNETWORKS_FULLY_CONNECTED +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
SIGNED32 +
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
SIGNED32 +
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
+
GatherLayer + Layer to perform the gather operation along the chosen axis. + +
    +
  • ANEURALNETWORKS_GATHER +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
InputLayer + Special layer used to provide input data to the computational network. + +
    +
  • N/A +
+
All + +
    +
  • All +
+
+ +
+
All +
+
InstanceNormalizationLayer + Layer to perform an instance normalization on a given axis. + +
    +
  • ANEURALNETWORKS_INSTANCE_NORMALIZATION +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
L2NormalizationLayer + Layer to perform an L2 normalization on a given axis. + +
    +
  • ANEURALNETWORKS_L2_NORMALIZATION +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
LogSoftmaxLayer + Layer to perform the log softmax activations given logits. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
LogicalBinaryLayer + Layer to perform Logical AND - Logical NOT - Logical OR operations. + +
    +
  • ANEURALNETWORKS_LOGICAL_AND +
  • ANEURALNETWORKS_LOGICAL_NOT +
  • ANEURALNETWORKS_LOGICAL_OR +
+
CpuRef + +
    +
  • All +
+
+ +
+
BOOLEAN +
+
CpuAcc + +
    +
  • All +
+
+ +
+
BOOLEAN +
+
GpuAcc + +
    +
  • All +
+
+ +
+
BOOLEAN +
+
LstmLayer + Layer to perform a single time step in a Long Short-Term Memory (LSTM) operation. + +
    +
  • ANEURALNETWORKS_LSTM +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
MapLayer + Layer to perform map operation on tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
MaximumLayer + Layer to perform an elementwise maximum of two tensors. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
MeanLayer + Layer to perform reduce mean operation. + +
    +
  • ANEURALNETWORKS_MEAN +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
MemCopyLayer + Layer to perform memory copy operation. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
BOOLEAN +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
MemImportLayer + Layer to perform memory import operation. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
MergeLayer + Layer to concatenate tensors along a given axis. + +
    +
  • ANEURALNETWORKS_CONCATENATION +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
MinimumLayer + Layer to perform an elementwise minimum of two tensors. + +
    +
  • ANEURALNETWORKS_MINIMUM +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
MultiplicationLayer + Layer to perform an elementwise multiplication of two tensors. + +
    +
  • ANEURALNETWORKS_MUL +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
NormalizationLayer + Layer to compute normalization operation. + +
    +
  • ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT32 +
FLOAT16 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
FLOAT32 +
FLOAT16 +
+
OutputLayer + A special layer providing access to a user supplied buffer into which the output of a network can be written. + +
    +
  • N/A +
+
All + +
    +
  • All +
+
+ +
+
All +
+
PadLayer + Layer to pad a tensor. + +
    +
  • ANEURALNETWORKS_PAD +
  • ANEURALNETWORKS_PAD_V2 +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
PermuteLayer + Layer to transpose an ND tensor. + +
    +
  • ANEURALNETWORKS_TRANSPOSE +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
Pooling2dLayer + Layer to perform pooling with the specified pooling operation. + +
    +
  • ANEURALNETWORKS_AVERAGE_POOL_2D +
  • ANEURALNETWORKS_L2_POOL_2D +
  • ANEURALNETWORKS_MAX_POOL_2D +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
PreCompiledLayer + Opaque layer provided by a backend which provides an executable representation of a subgraph from the original network. + +
    +
  • N/A +
+
N/A + N/A + N/A +
PreluLayer + Layer to compute the activation layer with the PRELU activation function. + +
    +
  • ANEURALNETWORKS_PRELU +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
QLstmLayer + Layer to perform quantized LSTM (Long Short-Term Memory) operation. + +
    +
  • ANEURALNETWORKS_QUANTIZED_LSTM +
  • ANEURALNETWORKS_QUANTIZED_16BIT_LSTM +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
SIGNED32 +
QSYMMS16 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMS8 +
QASYMMU8 +
SIGNED32 +
QSYMMS16 +
+
QuantizeLayer + Layer to perform quantization operation. + +
    +
  • ANEURALNETWORKS_QUANTIZE +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QASYMM16 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QASYMM16 +
FLOAT16 +
FLOAT32 +
+
QuantizedLstmLayer + Layer to perform quantized LSTM (Long Short-Term Memory) operation. + +
    +
  • ANEURALNETWORKS_QUANTIZED_LSTM +
  • ANEURALNETWORKS_QUANTIZED_16BIT_LSTM +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • All +
+
+ +
+
SIGNED32 +
QASYMMU8 +
QSYMMS16 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
SIGNED32 +
QASYMMU8 +
QSYMMS16 +
+
RankLayer + Layer to perform a rank operation. + +
    +
  • ANEURALNETWORKS_RANK +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
ReduceLayer + Layer to perform reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM: Mean of sum - PROD: Product - SUM_SQUARE: Sum of squares - SUM: Sum - MIN: Min - MAX: Max + +
    +
  • ANEURALNETWORKS_REDUCE_MAX +
  • ANEURALNETWORKS_REDUCE_MIN +
  • ANEURALNETWORKS_REDUCE_SUM +
  • ANEURALNETWORKS_REDUCE_PROD +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
ReshapeLayer + Layer to reshape a tensor. + +
    +
  • ANEURALNETWORKS_RESHAPE +
  • ANEURALNETWORKS_SQUEEZE +
  • ANEURALNETWORKS_EXPAND_DIMS +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
BOOLEAN +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
ResizeLayer + Layer to perform resize of a tensor using one of the interpolation methods: - Bilinear - Nearest Neighbor. + +
    +
  • ANEURALNETWORKS_RESIZE_BILINEAR +
  • ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
RsqrtLayer + Layer to perform Rsqrt operation. + +
    +
  • ANEURALNETWORKS_RSQRT +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
SIGNED32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
FLOAT16 +
FLOAT32 +
+
ShapeLayer + Layer to return the shape of the input tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
SliceLayer + Layer to perform tensor slicing. + +
    +
  • ANEURALNETWORKS_SLICE +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
SoftmaxLayer + Layer to perform softmax, log-softmax operation over the specified axis. + +
    +
  • ANEURALNETWORKS_LOG_SOFTMAX +
  • ANEURALNETWORKS_SOFTMAX +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
FLOAT16 +
FLOAT32 +
+
SpaceToBatchNdLayer + Layer to divide spatial dimensions of the tensor into a grid of blocks and interleaves these blocks with the batch dimension. + +
    +
  • ANEURALNETWORKS_SPACE_TO_BATCH_ND +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
SpaceToDepthLayer + Layer to rearrange blocks of spatial data into depth. + +
    +
  • ANEURALNETWORKS_SPACE_TO_DEPTH +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
SplitterLayer + Layer to split a tensor along a given axis. + +
    +
  • ANEURALNETWORKS_SPLIT +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
StackLayer + Layer to stack tensors along an axis. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
StandInLayer + A layer to represent "unknown" or "unsupported" operations in the input graph. It has a configurable number of input and output slots and an optional name. + +
    +
  • N/A +
+
N/A + N/A + N/A +
StridedSliceLayer + Layer to extract a strided slice of a tensor. + +
    +
  • ANEURALNETWORKS_STRIDED_SLICE +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
SubtractionLayer + Layer to perform an elementwise subtract of 2 tensors. + +
    +
  • ANEURALNETWORKS_SUB +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
SIGNED32 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
GpuAcc + +
    +
  • All +
+
+ +
+
QASYMMU8 +
QASYMMS8 +
QSYMMS16 +
SIGNED32 +
FLOAT16 +
FLOAT32 +
+
TransposeConvolution2dLayer + Layer to perform 2D transpose convolution (deconvolution) operation. + +
    +
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
SIGNED32 +
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
SIGNED32 +
FLOAT16 +
FLOAT32 +
QASYMMU8 +
QASYMMS8 +
QUANTIZEDSYMM8PERAXIS +
+
TransposeLayer + Layer to transpose a tensor. + +
    +
  • ANEURALNETWORKS_TRANSPOSE +
+
CpuRef + +
    +
  • All +
+
+ +
+
BFLOAT16 +
FLOAT16 +
FLOAT32 +
QASYMMS8 +
QASYMMU8 +
QSYMMS16 +
+
CpuAcc + +
    +
  • All +
+
+ +
+
All +
+
GpuAcc + +
    +
  • All +
+
+ +
+
All +
+
UnidirectionalSquenceLstmLayer + Layer to perform unidirectional sequence LSTM operation. + +
    +
  • ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM +
+
CpuRef + +
    +
  • All +
+
+ +
Input Types +
FLOAT32 +
+ +
Weight Types +
FLOAT32 +
QASYMMS8 +
+
UnmapLayer + Layer to perform unmap operation on tensor. + +
    +
  • N/A +
+
CpuRef + +
    +
  • All +
+
+ +
+
All +
+
CpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
GpuAcc + +
    +
  • NHWC +
  • NCHW +
+
+ +
+
All +
+
+ +*/ +} // namespace \ No newline at end of file diff --git a/docs/03_build_guides.dox b/docs/03_build_guides.dox new file mode 100644 index 0000000000..f193c9f0bd --- /dev/null +++ b/docs/03_build_guides.dox @@ -0,0 +1,35 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page buildguides Build Guides +@tableofcontents + +This page links all guides to build and/or install Arm NN tools hosted in our repository. +You can find additional tutorials on +https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides + +Arm NN is written using portable C++14 and the build system uses [CMake](https://cmake.org/), therefore it is possible +to build for a wide variety of target platforms, from a wide variety of host environments. + +Arm NN is managed by a single CMake project. That allows you to build multiple tools at once by combining the CMake +options from different build guides. + +## Installation via APT Repository + - @subpage md_InstallationViaAptRepository + +## TfLite Delegate build guide + - @subpage md_delegate_BuildGuideNative + +## Android NDK and Arm NN + - @subpage md_BuildGuideAndroidNDK + +## Crosscompile Guide for Arm NN + - @subpage md_BuildGuideCrossCompilation + +**/ +} \ No newline at end of file diff --git a/docs/03_use_guides.dox b/docs/03_use_guides.dox deleted file mode 100644 index 1ecef4a215..0000000000 --- a/docs/03_use_guides.dox +++ /dev/null @@ -1,21 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page useguides Integration Guides -@tableofcontents - -This page links all guides that explain how to use Arm NN tools and how to integrate them into your own project. -Some of these guides may not be hosted in our repository and will lead you to guides on -https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides - - -## TfLite delegate guides - - @subpage md_delegate_IntegrateDelegateIntoPython - -**/ -} \ No newline at end of file diff --git a/docs/04_contributor.dox b/docs/04_contributor.dox deleted file mode 100644 index 5cbb6c3b8c..0000000000 --- a/docs/04_contributor.dox +++ /dev/null @@ -1,39 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** - -@page contribguides Contribution Guides -@tableofcontents - -This is a collection of guides that should help you contribute code to Arm NN. Before you get started, please -take a look into our /ref md_Contributor_Guide section. - - - @subpage md_src_backends_README \n - This guide explains how to add your own backend to Arm NN. This might be useful if you would like to accelerate neural - networks on hardware that Arm NN currently doesn't support. - - - - @subpage md_src_dynamic_README \n - Arm NN allows you to load a backend dynamically on runtime. To find out how that can be done take a look at this guide. - -**/ -} - - -/// Create pages for each tool so they appear nicely in the doxygen tree-view. Subpages are not listed there. -/// Also we can overwrite the page name this way. -namespace armnn -{ -/** - -@page md_src_backends_README Backend Developer Guide - -@page md_src_dynamic_README Dynamically loadable Backend - -**/ -} diff --git a/docs/04_use_guides.dox b/docs/04_use_guides.dox new file mode 100644 index 0000000000..1ecef4a215 --- /dev/null +++ b/docs/04_use_guides.dox @@ -0,0 +1,21 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page useguides Integration Guides +@tableofcontents + +This page links all guides that explain how to use Arm NN tools and how to integrate them into your own project. +Some of these guides may not be hosted in our repository and will lead you to guides on +https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides + + +## TfLite delegate guides + - @subpage md_delegate_IntegrateDelegateIntoPython + +**/ +} \ No newline at end of file diff --git a/docs/05_contributor.dox b/docs/05_contributor.dox new file mode 100644 index 0000000000..5cbb6c3b8c --- /dev/null +++ b/docs/05_contributor.dox @@ -0,0 +1,39 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** + +@page contribguides Contribution Guides +@tableofcontents + +This is a collection of guides that should help you contribute code to Arm NN. Before you get started, please +take a look into our /ref md_Contributor_Guide section. + + - @subpage md_src_backends_README \n + This guide explains how to add your own backend to Arm NN. This might be useful if you would like to accelerate neural + networks on hardware that Arm NN currently doesn't support. + + + - @subpage md_src_dynamic_README \n + Arm NN allows you to load a backend dynamically on runtime. To find out how that can be done take a look at this guide. + +**/ +} + + +/// Create pages for each tool so they appear nicely in the doxygen tree-view. Subpages are not listed there. +/// Also we can overwrite the page name this way. +namespace armnn +{ +/** + +@page md_src_backends_README Backend Developer Guide + +@page md_src_dynamic_README Dynamically loadable Backend + +**/ +} diff --git a/docs/05_operator_list.dox b/docs/05_operator_list.dox deleted file mode 100644 index 90aee130bf..0000000000 --- a/docs/05_operator_list.dox +++ /dev/null @@ -1,3333 +0,0 @@ -/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. -/// -/// SPDX-License-Identifier: MIT -/// - -namespace armnn -{ -/** -@page operator_list Arm NN Operators - -@tableofcontents - -@section S5_1_operator_list Arm NN Operators - -Arm NN supports operators that are listed in below table. - -Arm NN supports a wide list of data-types. -The main data-types that the Machine Learning functions support are the following: -
    -
  • BFLOAT16: 16-bit non-standard brain floating point -
  • QASYMMU8: 8-bit unsigned asymmetric quantized -
  • QASYMMS8: 8-bit signed asymmetric quantized -
  • QUANTIZEDSYMM8PERAXIS: 8-bit signed symmetric quantized -
  • QSYMMS8: 8-bit unsigned symmetric quantized -
  • QSYMMS16: 16-bit unsigned symmetric quantized -
  • FLOAT32: 32-bit single precision floating point -
  • FLOAT16: 16-bit half precision floating point -
  • SIGNED32: 32-bit signed integer -
  • BOOLEAN: 8-bit unsigned char -
  • All: Agnostic to any specific data type -
- -Arm NN supports the following data layouts (fast changing dimension from right to left): -
    -
  • NHWC: Layout where channels are in the fastest changing dimension -
  • NCHW: Layout where width is in the fastest changing dimension -
  • All: Agnostic to any specific data layout -
-where N = batches, C = channels, H = height, W = width - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Operator - Description - Equivalent Android NNAPI Operator - Backends - Data Layouts - Data Types -
AbsLayer - Layer to perform absolute operation. - -
    -
  • ANEURALNETWORKS_ABS -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
ActivationLayer - Layer to simulate an activation layer with the specified activation function. - -
    -
  • ANEURALNETWORKS_ABS -
  • ANEURALNETWORKS_ELU -
  • ANEURALNETWORKS_HARD_SWISH -
  • ANEURALNETWORKS_LOGISTIC -
  • ANEURALNETWORKS_PRELU -
  • ANEURALNETWORKS_RELU -
  • ANEURALNETWORKS_RELU1 -
  • ANEURALNETWORKS_RELU6 -
  • ANEURALNETWORKS_SQRT -
  • ANEURALNETWORKS_TANH -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
FLOAT16 -
FLOAT32 -
-
AdditionLayer - Layer to add 2 tensors. - -
    -
  • ANEURALNETWORKS_ADD -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
ArgMinMaxLayer - Layer to calculate the index of the minimum or maximum values in a tensor - based on an axis. - -
    -
  • ANEURALNETWORKS_ARGMAX -
  • ANEURALNETWORKS_ARGMIN -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
SIGNED64 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
BatchNormalizationLayer - Layer to perform batch normalization. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT32 -
FLOAT16 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT32 -
FLOAT16 -
-
BatchToSpaceNdLayer - Layer to perform a batch to space transformation. - -
    -
  • ANEURALNETWORKS_BATCH_TO_SPACE_ND -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
CastLayer - Layer to cast a tensor to a type. - -
    -
  • ANEURALNETWORKS_CAST -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QSYMMS8 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
FLOAT16 -
SIGNED32 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
ChannelShuffleLayer - Layer to reorganize the channels of a tensor. - -
    -
  • ANEURALNETWORKS_CHANNEL_SHUFFLE -
-
CpuRef - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
QSYMMS8 -
QASYMMS8 -
QASYMMU8 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
FLOAT16 -
FLOAT32 -
-
ComparisonLayer - Layer to compare 2 tensors. - -
    -
  • ANEURALNETWORKS_EQUAL -
  • ANEURALNETWORKS_GREATER -
  • ANEURALNETWORKS_GREATER_EQUAL -
  • ANEURALNETWORKS_LESS -
  • ANEURALNETWORKS_LESS_EQUAL -
  • ANEURALNETWORKS_NOT_EQUAL -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
BOOLEAN -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
ConcatLayer - Layer to concatenate tensors along a given axis. - -
    -
  • ANEURALNETWORKS_CONCATENATION -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
ConstantLayer - Layer to provide a constant tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
ConvertBf16ToFp32Layer - Layer to convert BFloat16 tensor to Float32 tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
-
ConvertFp16ToFp32Layer - Layer to convert Float16 tensor to Float32 tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
ConvertFp32ToBf16Layer - Layer to convert Float32 tensor to BFloat16 tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
-
ConvertFp32ToFp16Layer - Layer to convert Float32 tensor to Float16 tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
Convolution2dLayer - Layer to compute a convolution operation. - -
    -
  • ANEURALNETWORKS_CONV_2D -
  • ANEURALNETWORKS_GROUPED_CONV_2D -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
SIGNED32 -
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
SIGNED32 -
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
-
Convolution3dLayer - Layer to compute a 3D convolution operation. - -
    -
  • N/A -
-
CpuRef - -
    -
  • NDHWC -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • N/A -
-
-
    -
  • N/A -
-
GpuAcc - -
    -
  • N/A -
-
-
    -
  • N/A -
-
DebugLayer - Layer to print out inter layer tensor information. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
SIGNED32 -
-
DepthToSpaceLayer - Layer to perform Depth to Space transformation. - -
    -
  • ANEURALNETWORKS_DEPTH_TO_SPACE -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
DepthwiseConvolution2dLayer - Layer to compute a deconvolution or transpose convolution. - -
    -
  • ANEURALNETWORKS_DEPTHWISE_CONV_2D -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT16 -
FLOAT32 -
SIGNED32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT16 -
FLOAT32 -
SIGNED32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
-
DequantizeLayer - Layer to dequantize the values in a tensor. - -
    -
  • ANEURALNETWORKS_DEQUANTIZE -
-
CpuRef - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
QSYMMS8 -
QSYMMS16 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
QSYMMS8 -
QSYMMS16 -
-
DetectionPostProcessLayer - Layer to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS). - -
    -
  • ANEURALNETWORKS_DETECTION_POSTPROCESSING -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT32 -
-
DivisionLayer - Layer to divide 2 tensors. - -
    -
  • ANEURALNETWORKS_DIV -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
ElementwiseBaseLayer - Layer to perform Add - Div - Max - Min - Mul operations. - -
    -
  • ANEURALNETWORKS_ADD -
  • ANEURALNETWORKS_DIV -
  • ANEURALNETWORKS_MAXIMUM -
  • ANEURALNETWORKS_MINIMUM -
  • ANEURALNETWORKS_MUL -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
ElementwiseUnaryLayer - Layer to perform Rsqrt - Exp - Neg - Log - Abs - Sin - Sqrt operations. - -
    -
  • ANEURALNETWORKS_ABS -
  • ANEURALNETWORKS_EXP -
  • ANEURALNETWORKS_LOG -
  • ANEURALNETWORKS_NEG -
  • ANEURALNETWORKS_RSQRT -
  • ANEURALNETWORKS_SIN -
  • ANEURALNETWORKS_SQRT -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
FakeQuantizationLayer - Layer to quantize float values and dequantize afterwards. The current implementation does not dequantize the values. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
FLOAT32 -
-
FillLayer - Layer to set the values of a tensor with a given value. - -
    -
  • ANEURALNETWORKS_FILL -
-
CpuRef - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
FloorLayer - Layer to round the value to the lowest whole number. - -
    -
  • ANEURALNETWORKS_FLOOR -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT32 -
FLOAT16 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT32 -
FLOAT16 -
-
FullyConnectedLayer - Layer to perform a fully connected / dense operation. - -
    -
  • ANEURALNETWORKS_FULLY_CONNECTED -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
SIGNED32 -
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
SIGNED32 -
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
-
GatherLayer - Layer to perform the gather operation along the chosen axis. - -
    -
  • ANEURALNETWORKS_GATHER -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
InputLayer - Special layer used to provide input data to the computational network. - -
    -
  • N/A -
-
All - -
    -
  • All -
-
- -
-
All -
-
InstanceNormalizationLayer - Layer to perform an instance normalization on a given axis. - -
    -
  • ANEURALNETWORKS_INSTANCE_NORMALIZATION -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT16 -
FLOAT32 -
-
L2NormalizationLayer - Layer to perform an L2 normalization on a given axis. - -
    -
  • ANEURALNETWORKS_L2_NORMALIZATION -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT16 -
FLOAT32 -
-
LogSoftmaxLayer - Layer to perform the log softmax activations given logits. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
LogicalBinaryLayer - Layer to perform Logical AND - Logical NOT - Logical OR operations. - -
    -
  • ANEURALNETWORKS_LOGICAL_AND -
  • ANEURALNETWORKS_LOGICAL_NOT -
  • ANEURALNETWORKS_LOGICAL_OR -
-
CpuRef - -
    -
  • All -
-
- -
-
BOOLEAN -
-
CpuAcc - -
    -
  • All -
-
- -
-
BOOLEAN -
-
GpuAcc - -
    -
  • All -
-
- -
-
BOOLEAN -
-
LstmLayer - Layer to perform a single time step in a Long Short-Term Memory (LSTM) operation. - -
    -
  • ANEURALNETWORKS_LSTM -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
MapLayer - Layer to perform map operation on tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
MaximumLayer - Layer to perform an elementwise maximum of two tensors. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
MeanLayer - Layer to perform reduce mean operation. - -
    -
  • ANEURALNETWORKS_MEAN -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
MemCopyLayer - Layer to perform memory copy operation. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
BOOLEAN -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
MemImportLayer - Layer to perform memory import operation. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
MergeLayer - Layer to concatenate tensors along a given axis. - -
    -
  • ANEURALNETWORKS_CONCATENATION -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
MinimumLayer - Layer to perform an elementwise minimum of two tensors. - -
    -
  • ANEURALNETWORKS_MINIMUM -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
MultiplicationLayer - Layer to perform an elementwise multiplication of two tensors. - -
    -
  • ANEURALNETWORKS_MUL -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
NormalizationLayer - Layer to compute normalization operation. - -
    -
  • ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT32 -
FLOAT16 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
FLOAT32 -
FLOAT16 -
-
OutputLayer - A special layer providing access to a user supplied buffer into which the output of a network can be written. - -
    -
  • N/A -
-
All - -
    -
  • All -
-
- -
-
All -
-
PadLayer - Layer to pad a tensor. - -
    -
  • ANEURALNETWORKS_PAD -
  • ANEURALNETWORKS_PAD_V2 -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
PermuteLayer - Layer to transpose an ND tensor. - -
    -
  • ANEURALNETWORKS_TRANSPOSE -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
Pooling2dLayer - Layer to perform pooling with the specified pooling operation. - -
    -
  • ANEURALNETWORKS_AVERAGE_POOL_2D -
  • ANEURALNETWORKS_L2_POOL_2D -
  • ANEURALNETWORKS_MAX_POOL_2D -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
PreCompiledLayer - Opaque layer provided by a backend which provides an executable representation of a subgraph from the original network. - -
    -
  • N/A -
-
N/A - N/A - N/A -
PreluLayer - Layer to compute the activation layer with the PRELU activation function. - -
    -
  • ANEURALNETWORKS_PRELU -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
QLstmLayer - Layer to perform quantized LSTM (Long Short-Term Memory) operation. - -
    -
  • ANEURALNETWORKS_QUANTIZED_LSTM -
  • ANEURALNETWORKS_QUANTIZED_16BIT_LSTM -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
SIGNED32 -
QSYMMS16 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMS8 -
QASYMMU8 -
SIGNED32 -
QSYMMS16 -
-
QuantizeLayer - Layer to perform quantization operation. - -
    -
  • ANEURALNETWORKS_QUANTIZE -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QASYMM16 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QASYMM16 -
FLOAT16 -
FLOAT32 -
-
QuantizedLstmLayer - Layer to perform quantized LSTM (Long Short-Term Memory) operation. - -
    -
  • ANEURALNETWORKS_QUANTIZED_LSTM -
  • ANEURALNETWORKS_QUANTIZED_16BIT_LSTM -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • All -
-
- -
-
SIGNED32 -
QASYMMU8 -
QSYMMS16 -
-
GpuAcc - -
    -
  • All -
-
- -
-
SIGNED32 -
QASYMMU8 -
QSYMMS16 -
-
RankLayer - Layer to perform a rank operation. - -
    -
  • ANEURALNETWORKS_RANK -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
ReduceLayer - Layer to perform reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM: Mean of sum - PROD: Product - SUM_SQUARE: Sum of squares - SUM: Sum - MIN: Min - MAX: Max - -
    -
  • ANEURALNETWORKS_REDUCE_MAX -
  • ANEURALNETWORKS_REDUCE_MIN -
  • ANEURALNETWORKS_REDUCE_SUM -
  • ANEURALNETWORKS_REDUCE_PROD -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
ReshapeLayer - Layer to reshape a tensor. - -
    -
  • ANEURALNETWORKS_RESHAPE -
  • ANEURALNETWORKS_SQUEEZE -
  • ANEURALNETWORKS_EXPAND_DIMS -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
BOOLEAN -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
ResizeLayer - Layer to perform resize of a tensor using one of the interpolation methods: - Bilinear - Nearest Neighbor. - -
    -
  • ANEURALNETWORKS_RESIZE_BILINEAR -
  • ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
RsqrtLayer - Layer to perform Rsqrt operation. - -
    -
  • ANEURALNETWORKS_RSQRT -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
SIGNED32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
FLOAT16 -
FLOAT32 -
-
ShapeLayer - Layer to return the shape of the input tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
SliceLayer - Layer to perform tensor slicing. - -
    -
  • ANEURALNETWORKS_SLICE -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
SoftmaxLayer - Layer to perform softmax, log-softmax operation over the specified axis. - -
    -
  • ANEURALNETWORKS_LOG_SOFTMAX -
  • ANEURALNETWORKS_SOFTMAX -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
FLOAT16 -
FLOAT32 -
-
SpaceToBatchNdLayer - Layer to divide spatial dimensions of the tensor into a grid of blocks and interleaves these blocks with the batch dimension. - -
    -
  • ANEURALNETWORKS_SPACE_TO_BATCH_ND -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
SpaceToDepthLayer - Layer to rearrange blocks of spatial data into depth. - -
    -
  • ANEURALNETWORKS_SPACE_TO_DEPTH -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
SplitterLayer - Layer to split a tensor along a given axis. - -
    -
  • ANEURALNETWORKS_SPLIT -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
StackLayer - Layer to stack tensors along an axis. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
StandInLayer - A layer to represent "unknown" or "unsupported" operations in the input graph. It has a configurable number of input and output slots and an optional name. - -
    -
  • N/A -
-
N/A - N/A - N/A -
StridedSliceLayer - Layer to extract a strided slice of a tensor. - -
    -
  • ANEURALNETWORKS_STRIDED_SLICE -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
SubtractionLayer - Layer to perform an elementwise subtract of 2 tensors. - -
    -
  • ANEURALNETWORKS_SUB -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
SIGNED32 -
-
CpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
GpuAcc - -
    -
  • All -
-
- -
-
QASYMMU8 -
QASYMMS8 -
QSYMMS16 -
SIGNED32 -
FLOAT16 -
FLOAT32 -
-
TransposeConvolution2dLayer - Layer to perform 2D transpose convolution (deconvolution) operation. - -
    -
  • ANEURALNETWORKS_TRANSPOSE_CONV_2D -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
SIGNED32 -
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
SIGNED32 -
FLOAT16 -
FLOAT32 -
QASYMMU8 -
QASYMMS8 -
QUANTIZEDSYMM8PERAXIS -
-
TransposeLayer - Layer to transpose a tensor. - -
    -
  • ANEURALNETWORKS_TRANSPOSE -
-
CpuRef - -
    -
  • All -
-
- -
-
BFLOAT16 -
FLOAT16 -
FLOAT32 -
QASYMMS8 -
QASYMMU8 -
QSYMMS16 -
-
CpuAcc - -
    -
  • All -
-
- -
-
All -
-
GpuAcc - -
    -
  • All -
-
- -
-
All -
-
UnidirectionalSquenceLstmLayer - Layer to perform unidirectional sequence LSTM operation. - -
    -
  • ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM -
-
CpuRef - -
    -
  • All -
-
- -
Input Types -
FLOAT32 -
- -
Weight Types -
FLOAT32 -
QASYMMS8 -
-
UnmapLayer - Layer to perform unmap operation on tensor. - -
    -
  • N/A -
-
CpuRef - -
    -
  • All -
-
- -
-
All -
-
CpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
GpuAcc - -
    -
  • NHWC -
  • NCHW -
-
- -
-
All -
-
- -*/ -} // namespace \ No newline at end of file diff --git a/docs/06_00_software_tools.dox b/docs/06_00_software_tools.dox new file mode 100644 index 0000000000..e560f44882 --- /dev/null +++ b/docs/06_00_software_tools.dox @@ -0,0 +1,46 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page swtools Software Tools + +On this page you can find all software tools contained in the Arm NN repository. You will find links to how-to guides and +other helpful information in each section. + + - @subpage delegate + - @subpage parsers + - @subpage md_python_pyarmnn_README + - @subpage serializer + - @subpage deserializer + - @subpage md_src_armnnConverter_README + - @subpage md_tests_ImageCSVFileGenerator_README + - @subpage md_tests_ImageTensorGenerator_README + - @subpage md_tests_ModelAccuracyTool-Armnn_README +**/ +} + + +/// Create pages for each tool so they appear nicely in the doxygen tree-view. Subpages are not listed there. +/// +/// Note: The parser, serializer and deserializer pages are created in 01_parsers.dox or 02_deserializer_serializer.dox +namespace armnn +{ +/** + +@page md_python_pyarmnn_README PyArmNN + +@page md_src_armnnConverter_README Converter + +@page md_tests_ModelAccuracyTool-Armnn_README ModelAccuracyTool + +@page md_tests_ImageCSVFileGenerator_README ImageCSVFileGenerator + +@page md_tests_ImageTensorGenerator_README ImageTensorGenerator + +**/ +} + diff --git a/docs/06_01_parsers.dox b/docs/06_01_parsers.dox new file mode 100644 index 0000000000..186ed6193a --- /dev/null +++ b/docs/06_01_parsers.dox @@ -0,0 +1,206 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page parsers Parsers + +@tableofcontents +Execute models from different machine learning platforms efficiently with our parsers. Simply choose a parser according +to the model you want to run e.g. If you've got a model in onnx format (.onnx) use our onnx-parser. + +If you would like to run a Tensorflow Lite (TfLite) model you probably also want to take a look at our @ref delegate. + +All parsers are written in C++ but it is also possible to use them in python. For more information on our python +bindings take a look into the @ref md_python_pyarmnn_README section. + +

+ + + + +@section S5_onnx_parser Arm NN Onnx Parser + +`armnnOnnxParser` is a library for loading neural networks defined in ONNX protobuf files into the Arm NN runtime. + +## ONNX operators that the Arm NN SDK supports + +This reference guide provides a list of ONNX operators the Arm NN SDK currently supports. + +The Arm NN SDK ONNX parser currently only supports fp32 operators. + +### Fully supported + +- Add + - See the ONNX [Add documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Add) for more information + +- AveragePool + - See the ONNX [AveragePool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#AveragePool) for more information. + +- Concat + - See the ONNX [Concat documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Concat) for more information. + +- Constant + - See the ONNX [Constant documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Constant) for more information. + +- Clip + - See the ONNX [Clip documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Clip) for more information. + +- Flatten + - See the ONNX [Flatten documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Flatten) for more information. + +- Gather + - See the ONNX [Gather documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gather) for more information. + +- GlobalAveragePool + - See the ONNX [GlobalAveragePool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#GlobalAveragePool) for more information. + +- LeakyRelu + - See the ONNX [LeakyRelu documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#LeakyRelu) for more information. + +- MaxPool + - See the ONNX [max_pool documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#MaxPool) for more information. + +- Relu + - See the ONNX [Relu documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Relu) for more information. + +- Reshape + - See the ONNX [Reshape documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Reshape) for more information. + +- Shape + - See the ONNX [Shape documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Shape) for more information. + +- Sigmoid + - See the ONNX [Sigmoid documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Sigmoid) for more information. + +- Tanh + - See the ONNX [Tanh documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Tanh) for more information. + +- Unsqueeze + - See the ONNX [Unsqueeze documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Unsqueeze) for more information. + +### Partially supported + +- Conv + - The parser only supports 2D convolutions with a group = 1 or group = #Nb_of_channel (depthwise convolution) +- BatchNormalization + - The parser does not support training mode. See the ONNX [BatchNormalization documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#BatchNormalization) for more information. +- Gemm + - The parser only supports constant bias or non-constant bias where bias dimension = 1. See the ONNX [Gemm documentation](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Gemm) for more information. +- MatMul + - The parser only supports constant weights in a fully connected layer. + +## Tested networks + +Arm tested these operators with the following ONNX fp32 neural networks: +- Mobilenet_v2. See the ONNX [MobileNet documentation](https://github.com/onnx/models/tree/master/vision/classification/mobilenet) for more information. +- Simple MNIST. This is no longer directly documented by ONNX. The model and test data may be downloaded [from the ONNX model zoo](https://onnxzoo.blob.core.windows.net/models/opset_8/mnist/mnist.tar.gz). + +More machine learning operators will be supported in future releases. +



+ + + + +@section S6_tf_lite_parser Arm NN Tf Lite Parser + +`armnnTfLiteParser` is a library for loading neural networks defined by TensorFlow Lite FlatBuffers files +into the Arm NN runtime. + +## TensorFlow Lite operators that the Arm NN SDK supports + +This reference guide provides a list of TensorFlow Lite operators the Arm NN SDK currently supports. + +### Fully supported +The Arm NN SDK TensorFlow Lite parser currently supports the following operators: + +- ABS +- ADD +- ARG_MAX +- ARG_MIN +- AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- BATCH_TO_SPACE +- CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- CONV_3D, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- DEPTH_TO_SPACE +- DEPTHWISE_CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- DEQUANTIZE +- DIV +- ELU +- EQUAL +- EXP +- EXPAND_DIMS +- FULLY_CONNECTED, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- GATHER +- GREATER +- GREATER_EQUAL +- HARD_SWISH +- LEAKY_RELU +- LESS +- LESS_EQUAL +- LOGICAL_NOT +- LOGISTIC +- L2_NORMALIZATION +- MAX_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE +- MAXIMUM +- MEAN +- MINIMUM +- MIRROR_PAD +- MUL +- NEG +- NOT_EQUAL +- PACK +- PAD +- PRELU +- QUANTIZE +- RELU +- RELU6 +- REDUCE_MAX +- REDUCE_MIN +- RESHAPE +- RESIZE_BILINEAR +- RESIZE_NEAREST_NEIGHBOR +- RSQRT +- SHAPE +- SLICE +- SOFTMAX +- SPACE_TO_BATCH +- SPLIT +- SPLIT_V +- SQUEEZE +- STRIDED_SLICE +- SUB +- SUM +- TANH +- TRANSPOSE +- TRANSPOSE_CONV +- UNPACK + +### Custom Operator +- TFLite_Detection_PostProcess + +## Tested networks +Arm tested these operators with the following TensorFlow Lite neural network: +- [Quantized MobileNet](http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224_quant.tgz) +- [Quantized SSD MobileNet](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz) +- DeepSpeech v1 converted from [TensorFlow model](https://github.com/mozilla/DeepSpeech/releases/tag/v0.4.1) +- DeepSpeaker +- [DeepLab v3+](https://www.tensorflow.org/lite/models/segmentation/overview) +- FSRCNN +- EfficientNet-lite +- RDN converted from [TensorFlow model](https://github.com/hengchuan/RDN-TensorFlow) +- Quantized RDN (CpuRef) +- [Quantized Inception v3](http://download.tensorflow.org/models/tflite_11_05_08/inception_v3_quant.tgz) +- [Quantized Inception v4](http://download.tensorflow.org/models/inception_v4_299_quant_20181026.tgz) (CpuRef) +- Quantized ResNet v2 50 (CpuRef) +- Quantized Yolo v3 (CpuRef) + +More machine learning operators will be supported in future releases. + +**/ +} + diff --git a/docs/06_02_deserializer_serializer.dox b/docs/06_02_deserializer_serializer.dox new file mode 100644 index 0000000000..5d4dc43a74 --- /dev/null +++ b/docs/06_02_deserializer_serializer.dox @@ -0,0 +1,184 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page serializer Serializer +@tableofcontents + +The `armnnSerializer` is a library for serializing an Arm NN network to a stream. + +@section serializersupport Supported Layers + +This reference guide provides a list of layers which can be serialized by the Arm NN SDK. + +@subsection serializersupportflully Fully supported + +The Arm NN SDK Serializer currently supports the following layers: + +- Activation +- Addition +- ArgMinMax +- BatchToSpaceNd +- BatchNormalization +- Cast +- ChannelShuffle +- Comparison +- Concat +- Constant +- Convolution2d +- Convolution3d +- DepthToSpace +- DepthwiseConvolution2d +- Dequantize +- DetectionPostProcess +- Division +- ElementwiseUnary +- Fill +- Floor +- FullyConnected +- Gather +- Input +- InstanceNormalization +- L2Normalization +- Logical +- LogSoftmax +- Lstm +- Maximum +- Mean +- Merge +- Minimum +- Multiplication +- Normalization +- Output +- Pad (Constant, Symmetric, Reflect) +- Permute +- Pooling2d +- Prelu +- QLstm +- Quantize +- QuantizedLstm +- Rank +- Reduce +- Reshape +- Resize +- Shape +- Slice +- Softmax +- SpaceToBatchNd +- SpaceToDepth +- Splitter +- Stack +- StandIn +- StridedSlice +- Subtraction +- Switch +- Transpose +- TransposeConvolution2d +- UnidirectionalSequenceLstm + +More machine learning layers will be supported in future releases. + +@subsection serializersupportdeprecated Deprecated layers + +Some layers have been deprecated and replaced by others layers. In order to maintain backward compatibility, serializations of these deprecated layers will deserialize to the layers that have replaced them, as follows: + +- Abs will deserialize as ElementwiseUnary +- Equal will deserialize as Comparison +- Greater will deserialize as Comparison +- Merger will deserialize as Concat +- ResizeBilinear will deserialize as Resize +- Rsqrt will deserialize as ElementwiseUnary +



+ +@page deserializer Deserializer +@tableofcontents + +The `armnnDeserializer` is a library for loading neural networks defined by Arm NN FlatBuffers files +into the Arm NN runtime. + +@section deserializersupport Supported Layers + +This reference guide provides a list of layers which can be deserialized by the Arm NN SDK. + +@subsection deserializersupportfully Fully supported + +The Arm NN SDK Deserialize parser currently supports the following layers: + +- Abs +- Activation +- Addition +- ArgMinMax +- BatchToSpaceNd +- BatchNormalization +- Cast +- ChannelShuffle +- Concat +- Comparison +- Constant +- Convolution2d +- DepthToSpace +- DepthwiseConvolution2d +- Dequantize +- DetectionPostProcess +- Division +- ElementwiseUnary +- Fill +- Floor +- FullyConnected +- Gather +- Input +- InstanceNormalization +- L2Normalization +- Logical +- LogSoftmax +- Lstm +- Maximum +- Mean +- Merge +- Minimum +- Multiplication +- Normalization +- Output +- Pad +- Permute +- Pooling2d +- Prelu +- Quantize +- QLstm +- QuantizedLstm +- Rank +- Reduce +- Reshape +- Resize +- ResizeBilinear +- Slice +- Softmax +- SpaceToBatchNd +- SpaceToDepth +- Splitter +- Stack +- StandIn +- StridedSlice +- Subtraction +- Switch +- Transpose +- TransposeConvolution2d +- UnidirectionalSequenceLstm + +More machine learning layers will be supported in future releases. + +@subsection deserializersupportdeprecated Deprecated layers + +Some layers have been deprecated and replaced by others layers. In order to maintain backward compatibility, serializations of these deprecated layers will deserialize to the layers that have replaced them, as follows: + +- Equal will deserialize as Comparison +- Merger will deserialize as Concat +- Greater will deserialize as Comparison +- ResizeBilinear will deserialize as Resize + +**/ +} \ No newline at end of file diff --git a/docs/06_03_delegate.dox b/docs/06_03_delegate.dox new file mode 100644 index 0000000000..b3caf8cbf8 --- /dev/null +++ b/docs/06_03_delegate.dox @@ -0,0 +1,178 @@ +/// Copyright (c) 2021 ARM Limited and Contributors. All rights reserved. +/// +/// SPDX-License-Identifier: MIT +/// + +namespace armnn +{ +/** +@page delegate TfLite Delegate +@tableofcontents + + +@section delegateintro About the delegate +'armnnDelegate' is a library for accelerating certain TensorFlow Lite (TfLite) operators on Arm hardware. It can be +integrated in TfLite using its delegation mechanism. TfLite will then delegate the execution of operators supported by +Arm NN to Arm NN. + +The main difference to our @ref S6_tf_lite_parser is the amount of operators you can run with it. If none of the active +backends support an operation in your model you won't be able to execute it with our parser. In contrast to that, TfLite +only delegates operations to the armnnDelegate if it does support them and otherwise executes them itself. In other +words, every TfLite model can be executed and every operation in your model that we can accelerate will be accelerated. +That is the reason why the armnnDelegate is our recommended way to accelerate TfLite models. + +If you need help building the armnnDelegate, please take a look at our [build guide](delegate/BuildGuideNative.md). +An example how to setup TfLite to integrate the armnnDelegate can be found in this +guide: [Integrate the delegate into python](delegate/IntegrateDelegateIntoPython.md) + + +@section delegatesupport Supported Operators +This reference guide provides a list of TensorFlow Lite operators the Arm NN SDK currently supports. + +@subsection delegatefullysupported Fully supported + +The Arm NN SDK TensorFlow Lite delegate currently supports the following operators: + +- ABS + +- ADD + +- ARGMAX + +- ARGMIN + +- AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- BATCH_TO_SPACE_ND + +- CAST + +- CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- CONV_3D, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- DEPTH_TO_SPACE + +- DEPTHWISE_CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- DEQUANTIZE + +- DIV + +- EQUAL + +- ELU + +- EXP + +- FULLY_CONNECTED, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- FLOOR + +- GATHER + +- GREATER + +- GREATER_OR_EQUAL + +- HARD_SWISH + +- LESS + +- LESS_OR_EQUAL + +- LOCAL_RESPONSE_NORMALIZATION + +- LOGICAL_AND + +- LOGICAL_NOT + +- LOGICAL_OR + +- LOGISTIC + +- LOG_SOFTMAX + +- LSTM + +- L2_NORMALIZATION + +- L2_POOL_2D + +- MAXIMUM + +- MAX_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE + +- MEAN + +- MINIMUM + +- MIRROR_PAD + +- MUL + +- NEG + +- NOT_EQUAL + +- PACK + +- PAD + +- PRELU + +- QUANTIZE + +- RANK + +- REDUCE_MAX + +- REDUCE_MIN + +- RESHAPE + +- RESIZE_BILINEAR + +- RESIZE_NEAREST_NEIGHBOR + +- RELU + +- RELU6 + +- RSQRT + +- SHAPE + +- SOFTMAX + +- SPACE_TO_BATCH_ND + +- SPACE_TO_DEPTH + +- SPLIT + +- SPLIT_V + +- SQRT + +- STRIDED_SLICE + +- SUB + +- SUM + +- TANH + +- TRANSPOSE + +- TRANSPOSE_CONV + +- UNIDIRECTIONAL_SEQUENCE_LSTM + +- UNPACK + +More machine learning operators will be supported in future releases. +**/ +} \ No newline at end of file diff --git a/docs/Doxyfile b/docs/Doxyfile index 3fc872eda2..1ddb3e3f08 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -813,14 +813,16 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = ./docs/01_00_software_tools.dox \ - ./docs/01_01_parsers.dox \ - ./docs/01_03_delegate.dox \ - ./docs/01_02_deserializer_serializer.dox \ - ./docs/02_build_guides.dox \ - ./docs/03_use_guides.dox \ - ./docs/04_contributor.dox \ - ./docs/05_operator_list.dox \ +INPUT = ./docs/01_00_quick_start.dox \ + ./docs/01_01_delegate_start_guide.dox \ + ./docs/02_operator_list.dox \ + ./docs/03_build_guides.dox \ + ./docs/04_use_guides.dox \ + ./docs/05_contributor.dox \ + ./docs/06_00_software_tools.dox \ + ./docs/06_01_parsers.dox \ + ./docs/06_02_deserializer_serializer.dox \ + ./docs/06_03_delegate.dox \ ./docs/FAQ.md \ ./tests/ImageCSVFileGenerator/README.md \ ./tests/ImageTensorGenerator/README.md \ @@ -835,7 +837,7 @@ INPUT = ./docs/01_00_software_tools.dox \ ./delegate/include \ ./delegate/src/armnn_external_delegate.cpp \ ./delegate/BuildGuideNative.md \ - ./delegate/IntegrateDelegateIntoPython.md \ + ./delegate/DelegateQuickStartGuide.md \ ./README.md \ ./InstallationViaAptRepository.md \ ./ContributorGuide.md \ -- cgit v1.2.1