aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortelsoa01 <telmo.soares@arm.com>2018-03-09 13:51:08 +0000
committertelsoa01 <telmo.soares@arm.com>2018-03-09 14:05:45 +0000
commit5307bc10ac488261e84ac76b2dede6039ea3fe96 (patch)
tree09de3cc29026ca9722179f6beb25b9a66efcf88e
downloadandroid-nn-driver-5307bc10ac488261e84ac76b2dede6039ea3fe96.tar.gz
Release 18.02
Change-Id: I41a89c149534a7c354a58e2c66a32cba572fc0c1
-rw-r--r--.gitignore6
-rw-r--r--Android.bp608
-rw-r--r--Android.mk128
-rw-r--r--ArmnnDriver.cpp429
-rw-r--r--ArmnnDriver.hpp61
-rw-r--r--ArmnnPreparedModel.cpp353
-rw-r--r--ArmnnPreparedModel.hpp83
-rw-r--r--LICENSE21
-rw-r--r--ModelToINetworkConverter.cpp1848
-rw-r--r--ModelToINetworkConverter.hpp156
-rw-r--r--README.md58
-rw-r--r--ReleaseNotes.txt59
-rw-r--r--RequestThread.cpp116
-rw-r--r--RequestThread.hpp104
-rw-r--r--SystemPropertiesUtils.hpp83
-rw-r--r--Utils.cpp273
-rw-r--r--Utils.hpp51
-rw-r--r--android.hardware.neuralnetworks@1.0-service-armnn.rc4
-rw-r--r--service.cpp32
-rwxr-xr-xsetup.sh48
-rw-r--r--test/Android.mk68
-rwxr-xr-xtest/Tests.cpp978
22 files changed, 5567 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..18a48d24
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+armnn
+boost_1_64_0
+clframework
+prebuilt
+.vscode/settings.json
+.gitignore
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 00000000..0daab064
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,608 @@
+//
+// Copyright © 2017 ARM Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+////////////////////////////////////////////
+// //
+// Arm Compute //
+// //
+////////////////////////////////////////////
+cc_library_static {
+ name: "armnn-arm_compute",
+ proprietary: true,
+ local_include_dirs: ["clframework/build/android-arm64v8a/src/core",
+ "clframework/build/android-arm64v8a/src/core/CL",
+ "clframework/arm_compute/core/NEON/kernels/assembly"],
+ export_include_dirs: ["clframework", "clframework/include"],
+ srcs: [
+ "clframework/src/core/AccessWindowAutoPadding.cpp",
+ "clframework/src/core/AccessWindowStatic.cpp",
+ "clframework/src/core/AccessWindowTranspose.cpp",
+ "clframework/src/core/CL/CLHelpers.cpp",
+ "clframework/src/core/CL/CLKernelLibrary.cpp",
+ "clframework/src/core/CL/ICLDistribution1D.cpp",
+ "clframework/src/core/CL/ICLHOG.cpp",
+ "clframework/src/core/CL/ICLKernel.cpp",
+ "clframework/src/core/CL/ICLLut.cpp",
+ "clframework/src/core/CL/ICLMultiHOG.cpp",
+ "clframework/src/core/CL/ICLMultiImage.cpp",
+ "clframework/src/core/CL/ICLSimple2DKernel.cpp",
+ "clframework/src/core/CL/ICLSimple3DKernel.cpp",
+ "clframework/src/core/CL/ICLSimpleKernel.cpp",
+ "clframework/src/core/CL/ICLTensor.cpp",
+ "clframework/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp",
+ "clframework/src/core/CL/kernels/CLAccumulateKernel.cpp",
+ "clframework/src/core/CL/kernels/CLActivationLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp",
+ "clframework/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp",
+ "clframework/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLBitwiseAndKernel.cpp",
+ "clframework/src/core/CL/kernels/CLBitwiseNotKernel.cpp",
+ "clframework/src/core/CL/kernels/CLBitwiseOrKernel.cpp",
+ "clframework/src/core/CL/kernels/CLBitwiseXorKernel.cpp",
+ "clframework/src/core/CL/kernels/CLBox3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLCannyEdgeKernel.cpp",
+ "clframework/src/core/CL/kernels/CLChannelCombineKernel.cpp",
+ "clframework/src/core/CL/kernels/CLChannelExtractKernel.cpp",
+ "clframework/src/core/CL/kernels/CLCol2ImKernel.cpp",
+ "clframework/src/core/CL/kernels/CLColorConvertKernel.cpp",
+ "clframework/src/core/CL/kernels/CLConvolutionKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDequantizationLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDerivativeKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDilateKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp",
+ "clframework/src/core/CL/kernels/CLErodeKernel.cpp",
+ "clframework/src/core/CL/kernels/CLFastCornersKernel.cpp",
+ "clframework/src/core/CL/kernels/CLFillBorderKernel.cpp",
+ "clframework/src/core/CL/kernels/CLFloorKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGaussian3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLGaussian5x5Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLGaussianPyramidKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp",
+ "clframework/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp",
+ "clframework/src/core/CL/kernels/CLHarrisCornersKernel.cpp",
+ "clframework/src/core/CL/kernels/CLHistogramKernel.cpp",
+ "clframework/src/core/CL/kernels/CLHOGDescriptorKernel.cpp",
+ "clframework/src/core/CL/kernels/CLHOGDetectorKernel.cpp",
+ "clframework/src/core/CL/kernels/CLIm2ColKernel.cpp",
+ "clframework/src/core/CL/kernels/CLIntegralImageKernel.cpp",
+ "clframework/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLLKTrackerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp",
+ "clframework/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp",
+ "clframework/src/core/CL/kernels/CLMeanStdDevKernel.cpp",
+ "clframework/src/core/CL/kernels/CLMedian3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLMinMaxLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLMinMaxLocationKernel.cpp",
+ "clframework/src/core/CL/kernels/CLNonLinearFilterKernel.cpp",
+ "clframework/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLNormalizationLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLPermuteKernel.cpp",
+ "clframework/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp",
+ "clframework/src/core/CL/kernels/CLPoolingLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLQuantizationLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLReductionOperationKernel.cpp",
+ "clframework/src/core/CL/kernels/CLRemapKernel.cpp",
+ "clframework/src/core/CL/kernels/CLReshapeLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLScaleKernel.cpp",
+ "clframework/src/core/CL/kernels/CLScharr3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLSobel3x3Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLSobel5x5Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLSobel7x7Kernel.cpp",
+ "clframework/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp",
+ "clframework/src/core/CL/kernels/CLTableLookupKernel.cpp",
+ "clframework/src/core/CL/kernels/CLThresholdKernel.cpp",
+ "clframework/src/core/CL/kernels/CLTransposeKernel.cpp",
+ "clframework/src/core/CL/kernels/CLWarpAffineKernel.cpp",
+ "clframework/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
+ "clframework/src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
+ "clframework/src/core/CL/OpenCL.cpp",
+ "clframework/src/core/CPP/ICPPSimpleKernel.cpp",
+ "clframework/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp",
+ "clframework/src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp",
+ "clframework/src/core/CPP/kernels/CPPPermuteKernel.cpp",
+ "clframework/src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp",
+ "clframework/src/core/Error.cpp",
+ "clframework/src/core/Helpers.cpp",
+ "clframework/src/core/HOGInfo.cpp",
+ "clframework/src/core/IAccessWindow.cpp",
+ "clframework/src/core/IDistribution1D.cpp",
+ "clframework/src/core/IDistribution.cpp",
+ "clframework/src/core/IKernel.cpp",
+ "clframework/src/core/ITensor.cpp",
+ "clframework/src/core/MultiImageInfo.cpp",
+ "clframework/src/core/NEON/kernels/convolution/common/utils.cpp",
+ "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_1x1_fp32_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_2x2_fp32_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp32_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp32_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_1x1_fp32_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_2x2_fp32_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_3x3_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/input_2x2_5x5_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/input_4x4_3x3_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/output_2x2_3x3_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/output_2x2_5x5_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/output_4x4_3x3_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/weights_2x2_3x3_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/weights_2x2_5x5_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/transforms/weights_4x4_3x3_fp32.cpp",
+ "clframework/src/core/NEON/kernels/convolution/winograd/winograd_gemm.cpp",
+ "clframework/src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEAccumulateKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEActivationLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEBitwiseAndKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEBitwiseNotKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEBitwiseOrKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEBitwiseXorKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEBox3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NECannyEdgeKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEChannelCombineKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEChannelExtractKernel.cpp",
+ "clframework/src/core/NEON/kernels/NECol2ImKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEColorConvertKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEConvolutionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NECumulativeDistributionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDerivativeKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDilateKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEErodeKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEFastCornersKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEFillArrayKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEFillBorderKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEFillInnerBorderKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEFloorKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGaussian3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGaussian5x5Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGaussianPyramidKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMInterleaveBlockedKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEHarrisCornersKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEHistogramKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEHOGDescriptorKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEHOGDetectorKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEIm2ColKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEIntegralImageKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NELKTrackerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEMeanStdDevKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEMedian3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEMinMaxLocationKernel.cpp",
+ "clframework/src/core/NEON/kernels/NENonLinearFilterKernel.cpp",
+ "clframework/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NENormalizationLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEPermuteKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEPoolingLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEReductionOperationKernel.cpp",
+ "clframework/src/core/NEON/kernels/NERemapKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEReshapeLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEScaleKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEScharr3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NESobel3x3Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NESobel5x5Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NESobel7x7Kernel.cpp",
+ "clframework/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp",
+ "clframework/src/core/NEON/kernels/NETableLookupKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEThresholdKernel.cpp",
+ "clframework/src/core/NEON/kernels/NETransposeKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEWarpKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp",
+ "clframework/src/core/NEON/kernels/NEWinogradLayerKernel.cpp",
+ "clframework/src/core/PyramidInfo.cpp",
+ "clframework/src/core/Rounding.cpp",
+ "clframework/src/core/SubTensorInfo.cpp",
+ "clframework/src/core/TensorInfo.cpp",
+ "clframework/src/core/Utils.cpp",
+ "clframework/src/core/utils/io/FileHandler.cpp",
+ "clframework/src/core/utils/logging/FilePrinter.cpp",
+ "clframework/src/core/utils/logging/Helpers.cpp",
+ "clframework/src/core/utils/logging/Logger.cpp",
+ "clframework/src/core/utils/logging/LoggerRegistry.cpp",
+ "clframework/src/core/utils/quantization/AsymmHelpers.cpp",
+ "clframework/src/core/Validate.cpp",
+ "clframework/src/runtime/Allocator.cpp",
+ "clframework/src/runtime/BlobLifetimeManager.cpp",
+ "clframework/src/runtime/BlobMemoryPool.cpp",
+ "clframework/src/runtime/CL/CLBufferAllocator.cpp",
+ "clframework/src/runtime/CL/CLDistribution1D.cpp",
+ "clframework/src/runtime/CL/CLHOG.cpp",
+ "clframework/src/runtime/CL/CLLutAllocator.cpp",
+ "clframework/src/runtime/CL/CLLut.cpp",
+ "clframework/src/runtime/CL/CLMultiHOG.cpp",
+ "clframework/src/runtime/CL/CLMultiImage.cpp",
+ "clframework/src/runtime/CL/CLPyramid.cpp",
+ "clframework/src/runtime/CL/CLScheduler.cpp",
+ "clframework/src/runtime/CL/CLSubTensor.cpp",
+ "clframework/src/runtime/CL/CLTensorAllocator.cpp",
+ "clframework/src/runtime/CL/CLTensor.cpp",
+ "clframework/src/runtime/CL/CLTuner.cpp",
+ "clframework/src/runtime/CL/functions/CLAbsoluteDifference.cpp",
+ "clframework/src/runtime/CL/functions/CLAccumulate.cpp",
+ "clframework/src/runtime/CL/functions/CLActivationLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLArithmeticAddition.cpp",
+ "clframework/src/runtime/CL/functions/CLArithmeticSubtraction.cpp",
+ "clframework/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLBitwiseAnd.cpp",
+ "clframework/src/runtime/CL/functions/CLBitwiseNot.cpp",
+ "clframework/src/runtime/CL/functions/CLBitwiseOr.cpp",
+ "clframework/src/runtime/CL/functions/CLBitwiseXor.cpp",
+ "clframework/src/runtime/CL/functions/CLBox3x3.cpp",
+ "clframework/src/runtime/CL/functions/CLCannyEdge.cpp",
+ "clframework/src/runtime/CL/functions/CLChannelCombine.cpp",
+ "clframework/src/runtime/CL/functions/CLChannelExtract.cpp",
+ "clframework/src/runtime/CL/functions/CLColorConvert.cpp",
+ "clframework/src/runtime/CL/functions/CLConvolution.cpp",
+ "clframework/src/runtime/CL/functions/CLConvolutionLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDeconvolutionLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp",
+ "clframework/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDepthConvertLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDequantizationLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLDerivative.cpp",
+ "clframework/src/runtime/CL/functions/CLDilate.cpp",
+ "clframework/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLEqualizeHistogram.cpp",
+ "clframework/src/runtime/CL/functions/CLErode.cpp",
+ "clframework/src/runtime/CL/functions/CLFastCorners.cpp",
+ "clframework/src/runtime/CL/functions/CLFillBorder.cpp",
+ "clframework/src/runtime/CL/functions/CLFlattenLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLFloor.cpp",
+ "clframework/src/runtime/CL/functions/CLFullyConnectedLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLGaussian3x3.cpp",
+ "clframework/src/runtime/CL/functions/CLGaussian5x5.cpp",
+ "clframework/src/runtime/CL/functions/CLGaussianPyramid.cpp",
+ "clframework/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLGEMM.cpp",
+ "clframework/src/runtime/CL/functions/CLGEMMInterleave4x4.cpp",
+ "clframework/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp",
+ "clframework/src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp",
+ "clframework/src/runtime/CL/functions/CLGEMMTranspose1xW.cpp",
+ "clframework/src/runtime/CL/functions/CLHarrisCorners.cpp",
+ "clframework/src/runtime/CL/functions/CLHistogram.cpp",
+ "clframework/src/runtime/CL/functions/CLHOGDescriptor.cpp",
+ "clframework/src/runtime/CL/functions/CLHOGDetector.cpp",
+ "clframework/src/runtime/CL/functions/CLHOGGradient.cpp",
+ "clframework/src/runtime/CL/functions/CLHOGMultiDetection.cpp",
+ "clframework/src/runtime/CL/functions/CLIntegralImage.cpp",
+ "clframework/src/runtime/CL/functions/CLL2NormalizeLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLLaplacianPyramid.cpp",
+ "clframework/src/runtime/CL/functions/CLLaplacianReconstruct.cpp",
+ "clframework/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLMagnitude.cpp",
+ "clframework/src/runtime/CL/functions/CLMeanStdDev.cpp",
+ "clframework/src/runtime/CL/functions/CLMedian3x3.cpp",
+ "clframework/src/runtime/CL/functions/CLMinMaxLocation.cpp",
+ "clframework/src/runtime/CL/functions/CLNonLinearFilter.cpp",
+ "clframework/src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp",
+ "clframework/src/runtime/CL/functions/CLNormalizationLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLOpticalFlow.cpp",
+ "clframework/src/runtime/CL/functions/CLPermute.cpp",
+ "clframework/src/runtime/CL/functions/CLPhase.cpp",
+ "clframework/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp",
+ "clframework/src/runtime/CL/functions/CLPoolingLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLQuantizationLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLReductionOperation.cpp",
+ "clframework/src/runtime/CL/functions/CLRemap.cpp",
+ "clframework/src/runtime/CL/functions/CLReshapeLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLROIPoolingLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLScale.cpp",
+ "clframework/src/runtime/CL/functions/CLScharr3x3.cpp",
+ "clframework/src/runtime/CL/functions/CLSobel3x3.cpp",
+ "clframework/src/runtime/CL/functions/CLSobel5x5.cpp",
+ "clframework/src/runtime/CL/functions/CLSobel7x7.cpp",
+ "clframework/src/runtime/CL/functions/CLSoftmaxLayer.cpp",
+ "clframework/src/runtime/CL/functions/CLTableLookup.cpp",
+ "clframework/src/runtime/CL/functions/CLThreshold.cpp",
+ "clframework/src/runtime/CL/functions/CLTranspose.cpp",
+ "clframework/src/runtime/CL/functions/CLWarpAffine.cpp",
+ "clframework/src/runtime/CL/functions/CLWarpPerspective.cpp",
+ "clframework/src/runtime/CL/ICLSimpleFunction.cpp",
+ "clframework/src/runtime/CPP/CPPScheduler.cpp",
+ "clframework/src/runtime/CPP/functions/CPPPermute.cpp",
+ "clframework/src/runtime/CPP/ICPPSimpleFunction.cpp",
+ "clframework/src/runtime/CPP/SingleThreadScheduler.cpp",
+ "clframework/src/runtime/Distribution1D.cpp",
+ "clframework/src/runtime/HOG.cpp",
+ "clframework/src/runtime/ILutAllocator.cpp",
+ "clframework/src/runtime/IScheduler.cpp",
+ "clframework/src/runtime/ISimpleLifetimeManager.cpp",
+ "clframework/src/runtime/ITensorAllocator.cpp",
+ "clframework/src/runtime/LutAllocator.cpp",
+ "clframework/src/runtime/Lut.cpp",
+ "clframework/src/runtime/Memory.cpp",
+ "clframework/src/runtime/MemoryManagerOnDemand.cpp",
+ "clframework/src/runtime/MultiHOG.cpp",
+ "clframework/src/runtime/MultiImage.cpp",
+ "clframework/src/runtime/NEON/functions/NEAbsoluteDifference.cpp",
+ "clframework/src/runtime/NEON/functions/NEAccumulate.cpp",
+ "clframework/src/runtime/NEON/functions/NEActivationLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEArithmeticAddition.cpp",
+ "clframework/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp",
+ "clframework/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEBitwiseAnd.cpp",
+ "clframework/src/runtime/NEON/functions/NEBitwiseNot.cpp",
+ "clframework/src/runtime/NEON/functions/NEBitwiseOr.cpp",
+ "clframework/src/runtime/NEON/functions/NEBitwiseXor.cpp",
+ "clframework/src/runtime/NEON/functions/NEBox3x3.cpp",
+ "clframework/src/runtime/NEON/functions/NECannyEdge.cpp",
+ "clframework/src/runtime/NEON/functions/NEChannelCombine.cpp",
+ "clframework/src/runtime/NEON/functions/NEChannelExtract.cpp",
+ "clframework/src/runtime/NEON/functions/NECol2Im.cpp",
+ "clframework/src/runtime/NEON/functions/NEColorConvert.cpp",
+ "clframework/src/runtime/NEON/functions/NEConvolution.cpp",
+ "clframework/src/runtime/NEON/functions/NEConvolutionLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDepthConvertLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDequantizationLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEDerivative.cpp",
+ "clframework/src/runtime/NEON/functions/NEDilate.cpp",
+ "clframework/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEEqualizeHistogram.cpp",
+ "clframework/src/runtime/NEON/functions/NEErode.cpp",
+ "clframework/src/runtime/NEON/functions/NEFastCorners.cpp",
+ "clframework/src/runtime/NEON/functions/NEFillBorder.cpp",
+ "clframework/src/runtime/NEON/functions/NEFlattenLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEFloor.cpp",
+ "clframework/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEGaussian3x3.cpp",
+ "clframework/src/runtime/NEON/functions/NEGaussian5x5.cpp",
+ "clframework/src/runtime/NEON/functions/NEGaussianPyramid.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMM.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp",
+ "clframework/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp",
+ "clframework/src/runtime/NEON/functions/NEHarrisCorners.cpp",
+ "clframework/src/runtime/NEON/functions/NEHistogram.cpp",
+ "clframework/src/runtime/NEON/functions/NEHOGDescriptor.cpp",
+ "clframework/src/runtime/NEON/functions/NEHOGDetector.cpp",
+ "clframework/src/runtime/NEON/functions/NEHOGGradient.cpp",
+ "clframework/src/runtime/NEON/functions/NEHOGMultiDetection.cpp",
+ "clframework/src/runtime/NEON/functions/NEIm2Col.cpp",
+ "clframework/src/runtime/NEON/functions/NEIntegralImage.cpp",
+ "clframework/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NELaplacianPyramid.cpp",
+ "clframework/src/runtime/NEON/functions/NELaplacianReconstruct.cpp",
+ "clframework/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEMagnitude.cpp",
+ "clframework/src/runtime/NEON/functions/NEMeanStdDev.cpp",
+ "clframework/src/runtime/NEON/functions/NEMedian3x3.cpp",
+ "clframework/src/runtime/NEON/functions/NEMinMaxLocation.cpp",
+ "clframework/src/runtime/NEON/functions/NENonLinearFilter.cpp",
+ "clframework/src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp",
+ "clframework/src/runtime/NEON/functions/NENormalizationLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEOpticalFlow.cpp",
+ "clframework/src/runtime/NEON/functions/NEPermute.cpp",
+ "clframework/src/runtime/NEON/functions/NEPhase.cpp",
+ "clframework/src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp",
+ "clframework/src/runtime/NEON/functions/NEPoolingLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEQuantizationLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEReductionOperation.cpp",
+ "clframework/src/runtime/NEON/functions/NERemap.cpp",
+ "clframework/src/runtime/NEON/functions/NEReshapeLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEROIPoolingLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NEScale.cpp",
+ "clframework/src/runtime/NEON/functions/NEScharr3x3.cpp",
+ "clframework/src/runtime/NEON/functions/NESobel3x3.cpp",
+ "clframework/src/runtime/NEON/functions/NESobel5x5.cpp",
+ "clframework/src/runtime/NEON/functions/NESobel7x7.cpp",
+ "clframework/src/runtime/NEON/functions/NESoftmaxLayer.cpp",
+ "clframework/src/runtime/NEON/functions/NETableLookup.cpp",
+ "clframework/src/runtime/NEON/functions/NEThreshold.cpp",
+ "clframework/src/runtime/NEON/functions/NETranspose.cpp",
+ "clframework/src/runtime/NEON/functions/NEWarpAffine.cpp",
+ "clframework/src/runtime/NEON/functions/NEWarpPerspective.cpp",
+ "clframework/src/runtime/NEON/functions/NEWinogradLayer.cpp",
+ "clframework/src/runtime/NEON/INESimpleFunction.cpp",
+ "clframework/src/runtime/OffsetLifetimeManager.cpp",
+ "clframework/src/runtime/OffsetMemoryPool.cpp",
+ "clframework/src/runtime/OMP/OMPScheduler.cpp",
+ "clframework/src/runtime/PoolManager.cpp",
+ "clframework/src/runtime/Pyramid.cpp",
+ "clframework/src/runtime/Scheduler.cpp",
+ "clframework/src/runtime/SubTensor.cpp",
+ "clframework/src/runtime/TensorAllocator.cpp",
+ "clframework/src/runtime/Tensor.cpp",
+ "clframework/src/runtime/Utils.cpp",
+ "clframework/utils/GraphUtils.cpp",
+ "clframework/utils/Utils.cpp",
+ ],
+ arch: {
+ arm: {
+ srcs: ["clframework/src/core/NEON/kernels/arm32/NEGEMMAArch32Kernel.cpp"],
+ },
+ arm64: {
+ srcs: ["clframework/src/core/NEON/kernels/arm64/NEGEMMAArch64Kernel.cpp",
+ "clframework/src/core/NEON/kernels/arm64/NEGEMMAArch64NativeKernel.cpp",
+ "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64A53Kernel.cpp",
+ "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.cpp",
+ "clframework/src/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.cpp",
+ "clframework/src/core/NEON/kernels/arm64/NEGEMVAArch64Kernel.cpp",
+ "clframework/src/core/NEON/kernels/arm64/NEHGEMMAArch64FP16Kernel.cpp"],
+ },
+ },
+ cppflags: [
+ "-std=c++14",
+ "-fexceptions",
+ "-DEMBEDDED_KERNELS",
+ "-DARM_COMPUTE_ASSERTS_ENABLED",
+ "-Wno-unused-parameter",
+ ],
+ rtti: true,
+}
+
+////////////////////////////////////////////
+// //
+// static boost libs //
+// //
+////////////////////////////////////////////
+cc_defaults {
+ name: "libboost-defaults",
+ export_include_dirs: ["boost_1_64_0"],
+ cflags: [
+ "-O3",
+ "-fexceptions",
+ "-Wno-unused-parameter",
+ ],
+ rtti: true,
+}
+
+cc_library_static {
+ name: "libboost_log",
+ defaults: ["libboost-defaults"],
+ srcs: [
+ "boost_1_64_0/libs/log/src/attribute_name.cpp",
+ "boost_1_64_0/libs/log/src/default_sink.cpp",
+ "boost_1_64_0/libs/log/src/global_logger_storage.cpp",
+ "boost_1_64_0/libs/log/src/record_ostream.cpp",
+ "boost_1_64_0/libs/log/src/thread_id.cpp",
+ "boost_1_64_0/libs/log/src/attribute_set.cpp",
+ "boost_1_64_0/libs/log/src/named_scope.cpp",
+ "boost_1_64_0/libs/log/src/severity_level.cpp",
+ "boost_1_64_0/libs/log/src/threadsafe_queue.cpp",
+ "boost_1_64_0/libs/log/src/attribute_value_set.cpp",
+ "boost_1_64_0/libs/log/src/dump.cpp",
+ "boost_1_64_0/libs/log/src/named_scope_format_parser.cpp",
+ "boost_1_64_0/libs/log/src/spirit_encoding.cpp",
+ "boost_1_64_0/libs/log/src/thread_specific.cpp",
+ "boost_1_64_0/libs/log/src/code_conversion.cpp",
+ "boost_1_64_0/libs/log/src/once_block.cpp",
+ "boost_1_64_0/libs/log/src/syslog_backend.cpp",
+ "boost_1_64_0/libs/log/src/timer.cpp",
+ "boost_1_64_0/libs/log/src/core.cpp",
+ "boost_1_64_0/libs/log/src/event.cpp",
+ "boost_1_64_0/libs/log/src/permissions.cpp",
+ "boost_1_64_0/libs/log/src/text_file_backend.cpp",
+ "boost_1_64_0/libs/log/src/timestamp.cpp",
+ "boost_1_64_0/libs/log/src/date_time_format_parser.cpp",
+ "boost_1_64_0/libs/log/src/exceptions.cpp",
+ "boost_1_64_0/libs/log/src/process_id.cpp",
+ "boost_1_64_0/libs/log/src/text_multifile_backend.cpp",
+ "boost_1_64_0/libs/log/src/trivial.cpp",
+ "boost_1_64_0/libs/log/src/default_attribute_names.cpp",
+ "boost_1_64_0/libs/log/src/format_parser.cpp",
+ "boost_1_64_0/libs/log/src/process_name.cpp",
+ "boost_1_64_0/libs/log/src/text_ostream_backend.cpp",
+ "boost_1_64_0/libs/log/src/unhandled_exception_count.cpp",
+ ],
+}
+cc_library_static {
+ name: "libboost_system",
+ defaults: ["libboost-defaults"],
+ srcs: [ "boost_1_64_0/libs/system/src/error_code.cpp", ],
+}
+
+cc_library_static {
+ name: "libboost_thread",
+ defaults: ["libboost-defaults"],
+ srcs: [
+ "boost_1_64_0/libs/thread/src/pthread/thread.cpp",
+ "boost_1_64_0/libs/thread/src/pthread/once_atomic.cpp",
+ "boost_1_64_0/libs/thread/src/pthread/once.cpp",
+ "boost_1_64_0/libs/thread/src/future.cpp",
+ "boost_1_64_0/libs/thread/src/tss_null.cpp",
+ ],
+}
+
+cc_library_static {
+ name: "libboost_unit_test_framework",
+ defaults: ["libboost-defaults"],
+ srcs: [
+ "boost_1_64_0/libs/test/src/compiler_log_formatter.cpp",
+ "boost_1_64_0/libs/test/src/framework.cpp",
+ "boost_1_64_0/libs/test/src/results_reporter.cpp",
+ "boost_1_64_0/libs/test/src/unit_test_main.cpp",
+ "boost_1_64_0/libs/test/src/cpp_main.cpp",
+ "boost_1_64_0/libs/test/src/junit_log_formatter.cpp",
+ "boost_1_64_0/libs/test/src/test_main.cpp",
+ "boost_1_64_0/libs/test/src/unit_test_monitor.cpp",
+ "boost_1_64_0/libs/test/src/debug.cpp",
+ "boost_1_64_0/libs/test/src/plain_report_formatter.cpp",
+ "boost_1_64_0/libs/test/src/test_tools.cpp",
+ "boost_1_64_0/libs/test/src/unit_test_parameters.cpp",
+ "boost_1_64_0/libs/test/src/decorator.cpp",
+ "boost_1_64_0/libs/test/src/progress_monitor.cpp",
+ "boost_1_64_0/libs/test/src/test_tree.cpp",
+ "boost_1_64_0/libs/test/src/xml_log_formatter.cpp",
+ "boost_1_64_0/libs/test/src/execution_monitor.cpp",
+ "boost_1_64_0/libs/test/src/results_collector.cpp",
+ "boost_1_64_0/libs/test/src/unit_test_log.cpp",
+ "boost_1_64_0/libs/test/src/xml_report_formatter.cpp",
+ ],
+}
+
+cc_library_static {
+ name: "libboost_program_options",
+ defaults: ["libboost-defaults"],
+ srcs: [
+ "boost_1_64_0/libs/program_options/src/cmdline.cpp",
+ "boost_1_64_0/libs/program_options/src/config_file.cpp",
+ "boost_1_64_0/libs/program_options/src/convert.cpp",
+ "boost_1_64_0/libs/program_options/src/options_description.cpp",
+ "boost_1_64_0/libs/program_options/src/parsers.cpp",
+ "boost_1_64_0/libs/program_options/src/positional_options.cpp",
+ "boost_1_64_0/libs/program_options/src/split.cpp",
+ "boost_1_64_0/libs/program_options/src/utf8_codecvt_facet.cpp",
+ "boost_1_64_0/libs/program_options/src/value_semantic.cpp",
+ "boost_1_64_0/libs/program_options/src/variables_map.cpp",
+ "boost_1_64_0/libs/program_options/src/winmain.cpp",
+ ],
+}
+
+subdirs = [
+ "armnn",
+]
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 00000000..48868790
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,128 @@
+#
+# Copyright © 2017 ARM Ltd. All rights reserved.
+# See LICENSE file in the project root for full license information.
+#
+
+ANDROID_NN_DRIVER_LOCAL_PATH := $(call my-dir)
+LOCAL_PATH := $(ANDROID_NN_DRIVER_LOCAL_PATH)
+
+# Configure these paths if you move the source or Khronos headers
+OPENCL_HEADER_PATH := $(LOCAL_PATH)/../mali/product/khronos/original
+NN_HEADER_PATH := $(LOCAL_PATH)/../../../frameworks/ml/nn/runtime/include
+
+###################
+# libarmnn-driver #
+###################
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libarmnn-driver
+LOCAL_MODULE_TAGS := eng optional
+LOCAL_ARM_MODE := arm
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
+ $(OPENCL_HEADER_PATH) \
+ $(NN_HEADER_PATH)
+
+LOCAL_CFLAGS := \
+ -std=c++14 \
+ -fexceptions \
+ -Werror \
+ -Wno-format-security
+ifeq ($(ARMNN_DRIVER_DEBUG),1)
+ LOCAL_CFLAGS+= -UNDEBUG
+endif
+
+LOCAL_SRC_FILES := \
+ ArmnnDriver.cpp \
+ ArmnnPreparedModel.cpp \
+ ModelToINetworkConverter.cpp \
+ RequestThread.cpp \
+ Utils.cpp
+
+LOCAL_STATIC_LIBRARIES := \
+ libneuralnetworks_common \
+ libarmnn \
+ libboost_log \
+ libboost_program_options \
+ libboost_system \
+ libboost_thread \
+ armnn-arm_compute \
+
+LOCAL_SHARED_LIBRARIES := \
+ libbase \
+ libhidlbase \
+ libhidltransport \
+ libhidlmemory \
+ liblog \
+ libutils \
+ android.hardware.neuralnetworks@1.0 \
+ android.hidl.allocator@1.0 \
+ android.hidl.memory@1.0 \
+ libOpenCL
+
+include $(BUILD_STATIC_LIBRARY)
+
+#####################################################
+# android.hardware.neuralnetworks@1.0-service-armnn #
+#####################################################
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := android.hardware.neuralnetworks@1.0-service-armnn
+LOCAL_INIT_RC := android.hardware.neuralnetworks@1.0-service-armnn.rc
+LOCAL_MODULE_TAGS := eng optional
+LOCAL_ARM_MODE := arm
+LOCAL_MODULE_RELATIVE_PATH := hw
+LOCAL_PROPRIETARY_MODULE := true
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+LOCAL_C_INCLUDES := \
+ $(NN_HEADER_PATH)
+
+LOCAL_CFLAGS := \
+ -std=c++14 \
+ -fexceptions
+ifeq ($(ARMNN_DRIVER_DEBUG),1)
+ LOCAL_CFLAGS+= -UNDEBUG
+endif
+
+LOCAL_SRC_FILES := \
+ service.cpp
+
+LOCAL_STATIC_LIBRARIES := \
+ libarmnn-driver \
+ libneuralnetworks_common \
+ libarmnn \
+ libboost_log \
+ libboost_program_options \
+ libboost_system \
+ libboost_thread \
+ armnn-arm_compute
+
+LOCAL_SHARED_LIBRARIES := \
+ libbase \
+ libhidlbase \
+ libhidltransport \
+ libhidlmemory \
+ libdl \
+ libhardware \
+ libtextclassifier \
+ libtextclassifier_hash \
+ liblog \
+ libutils \
+ android.hardware.neuralnetworks@1.0 \
+ android.hidl.allocator@1.0 \
+ android.hidl.memory@1.0 \
+ libOpenCL
+
+include $(BUILD_EXECUTABLE)
+
+##########################
+# armnn module and tests #
+##########################
+# Note we use ANDROID_NN_DRIVER_LOCAL_PATH rather than LOCAL_PATH because LOCAL_PATH will be overwritten
+# when including other .mk files that set it.
+include $(ANDROID_NN_DRIVER_LOCAL_PATH)/armnn/Android.mk
+include $(ANDROID_NN_DRIVER_LOCAL_PATH)/test/Android.mk \ No newline at end of file
diff --git a/ArmnnDriver.cpp b/ArmnnDriver.cpp
new file mode 100644
index 00000000..914d6560
--- /dev/null
+++ b/ArmnnDriver.cpp
@@ -0,0 +1,429 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "ArmnnDriver.hpp"
+#include "ArmnnPreparedModel.hpp"
+#include "ModelToINetworkConverter.hpp"
+#include "Utils.hpp"
+
+#include <log/log.h>
+#include "SystemPropertiesUtils.hpp"
+
+#include "OperationsUtils.h"
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/program_options.hpp>
+
+#include <cassert>
+#include <functional>
+#include <string>
+#include <sstream>
+
+using namespace android;
+using namespace std;
+
+namespace
+{
+
+const char *g_Float32PerformanceExecTimeName = "ArmNN.float32Performance.execTime";
+const char *g_Float32PerformancePowerUsageName = "ArmNN.float32Performance.powerUsage";
+const char *g_Quantized8PerformanceExecTimeName = "ArmNN.quantized8Performance.execTime";
+const char *g_Quantized8PerformancePowerUsageName = "ArmNN.quantized8Performance.powerUsage";
+
+}; //namespace
+
+namespace armnn_driver
+{
+
+DriverOptions::DriverOptions(armnn::Compute computeDevice)
+: m_ComputeDevice(computeDevice)
+, m_VerboseLogging(false)
+, m_UseAndroidNnCpuExecutor(false)
+{
+}
+
+DriverOptions::DriverOptions(int argc, char** argv)
+: m_ComputeDevice(armnn::Compute::GpuAcc)
+, m_VerboseLogging(false)
+, m_UseAndroidNnCpuExecutor(false)
+, m_ClTunedParametersMode(armnn::IClTunedParameters::Mode::UseTunedParameters)
+{
+ namespace po = boost::program_options;
+
+ std::string computeDeviceAsString;
+ std::string unsupportedOperationsAsString;
+ std::string clTunedParametersModeAsString;
+
+ po::options_description optionsDesc("Options");
+ optionsDesc.add_options()
+ ("compute,c",
+ po::value<std::string>(&computeDeviceAsString)->default_value("GpuAcc"),
+ "Which device to run layers on by default. Possible values are: CpuRef, CpuAcc, GpuAcc")
+
+ ("verbose-logging,v",
+ po::bool_switch(&m_VerboseLogging),
+ "Turns verbose logging on")
+
+ ("use-androidnn-cpu-executor,e",
+ po::bool_switch(&m_UseAndroidNnCpuExecutor),
+ "Forces the driver to satisfy requests via the Android-provided CpuExecutor")
+
+ ("request-inputs-and-outputs-dump-dir,d",
+ po::value<std::string>(&m_RequestInputsAndOutputsDumpDir)->default_value(""),
+ "If non-empty, the directory where request inputs and outputs should be dumped")
+
+ ("unsupported-operations,u",
+ po::value<std::string>(&unsupportedOperationsAsString)->default_value(""),
+ "If non-empty, a comma-separated list of operation indices which the driver will forcibly "
+ "consider unsupported")
+
+ ("cl-tuned-parameters-file,t",
+ po::value<std::string>(&m_ClTunedParametersFile)->default_value(""),
+ "If non-empty, the given file will be used to load/save CL tuned parameters. "
+ "See also --cl-tuned-parameters-mode")
+
+ ("cl-tuned-parameters-mode,m",
+ po::value<std::string>(&clTunedParametersModeAsString)->default_value("UseTunedParameters"),
+ "If 'UseTunedParameters' (the default), will read CL tuned parameters from the file specified by "
+ "--cl-tuned-parameters-file. "
+ "If 'UpdateTunedParameters', will also find the optimum parameters when preparing new networks and update "
+ "the file accordingly.");
+
+
+ po::variables_map variablesMap;
+ try
+ {
+ po::store(po::parse_command_line(argc, argv, optionsDesc), variablesMap);
+ po::notify(variablesMap);
+ }
+ catch (const po::error& e)
+ {
+ ALOGW("An error occurred attempting to parse program options: %s", e.what());
+ }
+
+ if (computeDeviceAsString == "CpuRef")
+ {
+ m_ComputeDevice = armnn::Compute::CpuRef;
+ }
+ else if (computeDeviceAsString == "GpuAcc")
+ {
+ m_ComputeDevice = armnn::Compute::GpuAcc;
+ }
+ else if (computeDeviceAsString == "CpuAcc")
+ {
+ m_ComputeDevice = armnn::Compute::CpuAcc;
+ }
+ else
+ {
+ ALOGW("Requested unknown compute device %s. Defaulting to compute id %s",
+ computeDeviceAsString.c_str(), GetComputeDeviceAsCString(m_ComputeDevice));
+ }
+
+ if (!unsupportedOperationsAsString.empty())
+ {
+ std::istringstream argStream(unsupportedOperationsAsString);
+
+ std::string s;
+ while (!argStream.eof())
+ {
+ std::getline(argStream, s, ',');
+ try
+ {
+ unsigned int operationIdx = std::stoi(s);
+ m_ForcedUnsupportedOperations.insert(operationIdx);
+ }
+ catch (const std::invalid_argument&)
+ {
+ ALOGW("Ignoring invalid integer argument in -u/--unsupported-operations value: %s", s.c_str());
+ }
+ }
+ }
+
+ if (!m_ClTunedParametersFile.empty())
+ {
+ // The mode is only relevant if the file path has been provided
+ if (clTunedParametersModeAsString == "UseTunedParameters")
+ {
+ m_ClTunedParametersMode = armnn::IClTunedParameters::Mode::UseTunedParameters;
+ }
+ else if (clTunedParametersModeAsString == "UpdateTunedParameters")
+ {
+ m_ClTunedParametersMode = armnn::IClTunedParameters::Mode::UpdateTunedParameters;
+ }
+ else
+ {
+ ALOGW("Requested unknown cl-tuned-parameters-mode '%s'. Defaulting to UseTunedParameters",
+ clTunedParametersModeAsString.c_str());
+ }
+ }
+}
+
+ArmnnDriver::ArmnnDriver(DriverOptions options)
+ : m_Runtime(nullptr, nullptr)
+ , m_ClTunedParameters(nullptr, nullptr)
+ , m_Options(std::move(options))
+{
+ ALOGV("ArmnnDriver::ArmnnDriver()");
+
+ armnn::ConfigureLogging(false, m_Options.IsVerboseLoggingEnabled(), armnn::LogSeverity::Trace);
+ if (m_Options.IsVerboseLoggingEnabled())
+ {
+ SetMinimumLogSeverity(base::VERBOSE);
+ }
+ else
+ {
+ SetMinimumLogSeverity(base::INFO);
+ }
+
+ try
+ {
+ armnn::IRuntime::CreationOptions options(m_Options.GetComputeDevice());
+ options.m_UseCpuRefAsFallback = false;
+ if (!m_Options.GetClTunedParametersFile().empty())
+ {
+ m_ClTunedParameters = armnn::IClTunedParameters::Create(m_Options.GetClTunedParametersMode());
+ try
+ {
+ m_ClTunedParameters->Load(m_Options.GetClTunedParametersFile().c_str());
+ }
+ catch (const armnn::Exception& error)
+ {
+ // This is only a warning because the file won't exist the first time you are generating it.
+ ALOGW("ArmnnDriver: Failed to load CL tuned parameters file '%s': %s",
+ m_Options.GetClTunedParametersFile().c_str(), error.what());
+ }
+ options.m_ClTunedParameters = m_ClTunedParameters.get();
+ }
+ m_Runtime = armnn::IRuntime::Create(options);
+ }
+ catch (const armnn::ClRuntimeUnavailableException& error)
+ {
+ ALOGE("ArmnnDriver: Failed to setup CL runtime: %s. Device will be unavailable.", error.what());
+ }
+}
+
+Return<void> ArmnnDriver::getCapabilities(getCapabilities_cb cb)
+{
+ ALOGV("ArmnnDriver::getCapabilities()");
+
+ Capabilities capabilities;
+ if (m_Runtime)
+ {
+ capabilities.float32Performance.execTime =
+ ParseSystemProperty(g_Float32PerformanceExecTimeName, 1.0f);
+
+ capabilities.float32Performance.powerUsage =
+ ParseSystemProperty(g_Float32PerformancePowerUsageName, 1.0f);
+
+ capabilities.quantized8Performance.execTime =
+ ParseSystemProperty(g_Quantized8PerformanceExecTimeName, 1.0f);
+
+ capabilities.quantized8Performance.powerUsage =
+ ParseSystemProperty(g_Quantized8PerformancePowerUsageName, 1.0f);
+
+ cb(ErrorStatus::NONE, capabilities);
+ }
+ else
+ {
+ capabilities.float32Performance.execTime = 0;
+ capabilities.float32Performance.powerUsage = 0;
+ capabilities.quantized8Performance.execTime = 0;
+ capabilities.quantized8Performance.powerUsage = 0;
+
+ cb(ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
+ }
+
+ return Void();
+}
+
+Return<void> ArmnnDriver::getSupportedOperations(const Model& model, getSupportedOperations_cb cb)
+{
+ ALOGV("ArmnnDriver::getSupportedOperations()");
+
+ std::vector<bool> result;
+
+ if (!m_Runtime)
+ {
+ cb(ErrorStatus::DEVICE_UNAVAILABLE, result);
+ return Void();
+ }
+
+ // Run general model validation, if this doesn't pass we shouldn't analyse the model anyway
+ if (!android::nn::validateModel(model))
+ {
+ cb(ErrorStatus::INVALID_ARGUMENT, result);
+ return Void();
+ }
+
+ // Attempt to convert the model to an ArmNN input network (INetwork).
+ ModelToINetworkConverter modelConverter(m_Runtime->GetDeviceSpec().DefaultComputeDevice, model,
+ m_Options.GetForcedUnsupportedOperations());
+
+ if (modelConverter.GetConversionResult() != ConversionResult::Success
+ && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature)
+ {
+ cb(ErrorStatus::GENERAL_FAILURE, result);
+ return Void();
+ }
+
+ // Check each operation if it was converted successfully and copy the flags
+ // into the result (vector<bool>) that we need to return to Android
+ result.reserve(model.operations.size());
+ for (uint32_t operationIdx = 0; operationIdx < model.operations.size(); operationIdx++)
+ {
+ bool operationSupported = modelConverter.IsOperationSupported(operationIdx);
+ result.push_back(operationSupported);
+ }
+
+ cb(ErrorStatus::NONE, result);
+ return Void();
+}
+
+namespace
+{
+
+void NotifyCallbackAndCheck(const sp<IPreparedModelCallback>& callback, ErrorStatus errorStatus,
+ const ::android::sp<IPreparedModel>& preparedModelPtr)
+{
+ Return<void> returned = callback->notify(errorStatus, preparedModelPtr);
+ // This check is required, if the callback fails and it isn't checked it will bring down the service
+ if (!returned.isOk())
+ {
+ ALOGE("ArmnnDriver::prepareModel: hidl callback failed to return properly: %s ",
+ returned.description().c_str());
+ }
+}
+
+Return<ErrorStatus> FailPrepareModel(ErrorStatus error,
+ const std::string& message,
+ const sp<IPreparedModelCallback>& callback)
+{
+ ALOGW("ArmnnDriver::prepareModel: %s", message.c_str());
+ NotifyCallbackAndCheck(callback, error, nullptr);
+ return error;
+}
+
+}
+
+Return<ErrorStatus> ArmnnDriver::prepareModel(const Model& model,
+ const sp<IPreparedModelCallback>& cb)
+{
+ ALOGV("ArmnnDriver::prepareModel()");
+
+ if (cb.get() == nullptr)
+ {
+ ALOGW("ArmnnDriver::prepareModel: Invalid callback passed to prepareModel");
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ if (!m_Runtime)
+ {
+ return FailPrepareModel(ErrorStatus::DEVICE_UNAVAILABLE, "ArmnnDriver::prepareModel: Device unavailable", cb);
+ }
+
+ if (!android::nn::validateModel(model))
+ {
+ return FailPrepareModel(ErrorStatus::INVALID_ARGUMENT,
+ "ArmnnDriver::prepareModel: Invalid model passed as input", cb);
+ }
+
+ if (m_Options.UseAndroidNnCpuExecutor())
+ {
+ sp<AndroidNnCpuExecutorPreparedModel> preparedModel = new AndroidNnCpuExecutorPreparedModel(model,
+ m_Options.GetRequestInputsAndOutputsDumpDir());
+ if (preparedModel->Initialize())
+ {
+ NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel);
+ return ErrorStatus::NONE;
+ }
+ else
+ {
+ NotifyCallbackAndCheck(cb, ErrorStatus::INVALID_ARGUMENT, preparedModel);
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
+ }
+
+ // Deliberately ignore any unsupported operations requested by the options -
+ // at this point we're being asked to prepare a model that we've already declared support for
+ // and the operation indices may be different to those in getSupportedOperations anyway.
+ std::set<unsigned int> unsupportedOperations;
+ ModelToINetworkConverter modelConverter(m_Runtime->GetDeviceSpec().DefaultComputeDevice, model,
+ unsupportedOperations);
+
+ if (modelConverter.GetConversionResult() != ConversionResult::Success)
+ {
+ return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
+ }
+
+ // optimize the network
+ armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+ try
+ {
+ optNet = armnn::Optimize(*modelConverter.GetINetwork(), m_Runtime->GetDeviceSpec());
+ }
+ catch (armnn::Exception& e)
+ {
+ std::stringstream message;
+ message << "armnn::Exception ("<<e.what()<<") caught from optimize.";
+ return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ }
+
+ // load it into the runtime
+ armnn::NetworkId netId = 0;
+ try
+ {
+ if (m_Runtime->LoadNetwork(netId, std::move(optNet)) != armnn::Status::Success)
+ {
+ return FailPrepareModel(ErrorStatus::GENERAL_FAILURE,
+ "ArmnnDriver::prepareModel: Network could not be loaded", cb);
+ }
+ }
+ catch (armnn::Exception& e)
+ {
+ std::stringstream message;
+ message << "armnn::Exception (" << e.what()<< ") caught from LoadNetwork.";
+ return FailPrepareModel(ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+ }
+
+ std::unique_ptr<ArmnnPreparedModel> preparedModel(new ArmnnPreparedModel(
+ netId,
+ m_Runtime.get(),
+ model,
+ m_Options.GetRequestInputsAndOutputsDumpDir()
+ ));
+
+ // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
+ // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
+ preparedModel->ExecuteWithDummyInputs();
+
+ if (m_ClTunedParameters &&
+ m_Options.GetClTunedParametersMode() == armnn::IClTunedParameters::Mode::UpdateTunedParameters)
+ {
+ // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
+ try
+ {
+ m_ClTunedParameters->Save(m_Options.GetClTunedParametersFile().c_str());
+ }
+ catch (const armnn::Exception& error)
+ {
+ ALOGE("ArmnnDriver: Failed to save CL tuned parameters file '%s': %s",
+ m_Options.GetClTunedParametersFile().c_str(), error.what());
+ }
+ }
+
+ NotifyCallbackAndCheck(cb, ErrorStatus::NONE, preparedModel.release());
+
+ return ErrorStatus::NONE;
+}
+
+Return<DeviceStatus> ArmnnDriver::getStatus()
+{
+ ALOGV("ArmnnDriver::getStatus()");
+ return DeviceStatus::AVAILABLE;
+}
+
+}
diff --git a/ArmnnDriver.hpp b/ArmnnDriver.hpp
new file mode 100644
index 00000000..8b66e774
--- /dev/null
+++ b/ArmnnDriver.hpp
@@ -0,0 +1,61 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "HalInterfaces.h"
+#include "NeuralNetworks.h"
+#include <armnn/ArmNN.hpp>
+
+#include <memory>
+#include <set>
+#include <string>
+
+namespace armnn_driver
+{
+
+class DriverOptions
+{
+public:
+ DriverOptions(armnn::Compute computeDevice);
+ DriverOptions(int argc, char** argv);
+ DriverOptions(DriverOptions&& other) = default;
+
+ armnn::Compute GetComputeDevice() const { return m_ComputeDevice; }
+ bool IsVerboseLoggingEnabled() const { return m_VerboseLogging; }
+ const std::string& GetRequestInputsAndOutputsDumpDir() const { return m_RequestInputsAndOutputsDumpDir; }
+ bool UseAndroidNnCpuExecutor() const { return m_UseAndroidNnCpuExecutor; }
+ const std::set<unsigned int>& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; }
+ const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; }
+ armnn::IClTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; }
+
+private:
+ armnn::Compute m_ComputeDevice;
+ bool m_VerboseLogging;
+ bool m_UseAndroidNnCpuExecutor;
+ std::string m_RequestInputsAndOutputsDumpDir;
+ std::set<unsigned int> m_ForcedUnsupportedOperations;
+ std::string m_ClTunedParametersFile;
+ armnn::IClTunedParameters::Mode m_ClTunedParametersMode;
+};
+
+class ArmnnDriver : public IDevice {
+public:
+ ArmnnDriver(DriverOptions options);
+ virtual ~ArmnnDriver() {}
+ virtual Return<void> getCapabilities(getCapabilities_cb _hidl_cb) override;
+ virtual Return<void> getSupportedOperations(const Model &model,
+ getSupportedOperations_cb _hidl_cb) override;
+ virtual Return<ErrorStatus> prepareModel(const Model &model,
+ const android::sp<IPreparedModelCallback>& callback);
+ virtual Return<DeviceStatus> getStatus() override;
+
+private:
+ armnn::IRuntimePtr m_Runtime;
+ armnn::IClTunedParametersPtr m_ClTunedParameters;
+ DriverOptions m_Options;
+};
+
+}
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
new file mode 100644
index 00000000..1bd72199
--- /dev/null
+++ b/ArmnnPreparedModel.cpp
@@ -0,0 +1,353 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "ArmnnPreparedModel.hpp"
+#include "Utils.hpp"
+
+#include <boost/format.hpp>
+#include <log/log.h>
+#include <OperationsUtils.h>
+
+#include <cassert>
+#include <cinttypes>
+
+using namespace android;
+
+namespace
+{
+using namespace armnn_driver;
+
+void NotifyCallbackAndCheck(const ::android::sp<IExecutionCallback>& callback, ErrorStatus errorStatus,
+ std::string callingFunction)
+{
+ Return<void> returned = callback->notify(errorStatus);
+ // This check is required, if the callback fails and it isn't checked it will bring down the service
+ if (!returned.isOk())
+ {
+ ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
+ callingFunction.c_str(), returned.description().c_str());
+ }
+}
+
+bool ValidateRequestArgument(const RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
+{
+ if (requestArg.dimensions.size() != 0)
+ {
+ if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
+ {
+ ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
+ requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
+ return false;
+ }
+
+ for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
+ {
+ if (requestArg.dimensions[d] != tensorInfo.GetShape()[d])
+ {
+ ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
+ d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+armnn::Tensor GetTensorForRequestArgument(const RequestArgument& requestArg,
+ const armnn::TensorInfo& tensorInfo,
+ const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
+{
+ if (!ValidateRequestArgument(requestArg, tensorInfo))
+ {
+ return armnn::Tensor();
+ }
+
+ return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
+}
+
+inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
+{
+ return tensorNamePrefix + std::to_string(index);
+}
+
+}
+
+namespace armnn_driver
+{
+
+RequestThread ArmnnPreparedModel::m_RequestThread;
+
+template <typename TensorBindingCollection>
+void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix,
+ const TensorBindingCollection& tensorBindings)
+{
+ if (!m_RequestInputsAndOutputsDumpDir.empty())
+ {
+ const std::string requestName = boost::str(boost::format("%1%_%2%.dump") % m_NetworkId % m_RequestCount);
+ for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
+ {
+ DumpTensor(m_RequestInputsAndOutputsDumpDir,
+ requestName,
+ BuildTensorName(tensorNamePrefix, i),
+ tensorBindings[i].second);
+ }
+ }
+}
+
+ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const Model& model,
+ const std::string& requestInputsAndOutputsDumpDir)
+: m_NetworkId(networkId)
+, m_Runtime(runtime)
+, m_Model(model)
+, m_RequestCount(0)
+, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+{
+}
+
+ArmnnPreparedModel::~ArmnnPreparedModel()
+{
+ //unload the network associated with this model
+ m_Runtime->UnloadNetwork(m_NetworkId);
+}
+
+Return<ErrorStatus> ArmnnPreparedModel::execute(const Request& request,
+ const ::android::sp<IExecutionCallback>& callback)
+{
+ ALOGV("ArmnnPreparedModel::execute(): %s", GetModelSummary(m_Model).c_str());
+ m_RequestCount++;
+
+ if (callback.get() == nullptr) {
+ ALOGE("ArmnnPreparedModel::execute invalid callback passed");
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ if (!android::nn::validateRequest(request, m_Model))
+ {
+ NotifyCallbackAndCheck(callback, ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel::execute");
+ return ErrorStatus::INVALID_ARGUMENT;
+ }
+
+ if (!m_RequestInputsAndOutputsDumpDir.empty())
+ {
+ ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
+ }
+
+ // allocate the tensors on the heap, as they are passed to the request thread
+ auto pInputTensors = std::make_shared<armnn::InputTensors>();
+ auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
+
+ // map the memory pool into shared pointers
+ // use a shared memory pools vector on the heap, as it is passed to the request thread
+ auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
+ if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
+ {
+ NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+
+ // add the inputs and outputs with their data
+ try
+ {
+ pInputTensors->reserve(request.inputs.size());
+ for (unsigned int i = 0; i < request.inputs.size(); i++)
+ {
+ const auto& inputArg = request.inputs[i];
+
+ const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
+ if (inputTensor.GetMemoryArea() == nullptr)
+ {
+ ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+
+ pInputTensors->emplace_back(i, inputTensor);
+ }
+
+ pOutputTensors->reserve(request.outputs.size());
+ for (unsigned int i = 0; i < request.outputs.size(); i++)
+ {
+ const auto& outputArg = request.outputs[i];
+
+ const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
+ const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
+ if (outputTensor.GetMemoryArea() == nullptr)
+ {
+ ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+
+ pOutputTensors->emplace_back(i, outputTensor);
+ }
+ }
+ catch (armnn::Exception& e)
+ {
+ ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
+ NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+
+ ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
+ // post the request for asynchronous execution
+ m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback);
+ ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
+
+ return ErrorStatus::NONE; // successfully queued
+}
+
+void ArmnnPreparedModel::ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& pInputTensors,
+ std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
+ const ::android::sp<IExecutionCallback>& callback)
+{
+ ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
+
+ DumpTensorsIfRequired("Input", *pInputTensors);
+
+ // run it
+ try
+ {
+ m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors);
+ }
+ catch (armnn::Exception& e)
+ {
+ ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
+ NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
+ return;
+ }
+
+ DumpTensorsIfRequired("Output", *pOutputTensors);
+
+ // Commit output buffers.
+ // Note that we update *all* pools, even if they aren't actually used as outputs -
+ // this is simpler and is what the CpuExecutor does.
+ for (android::nn::RunTimePoolInfo& pool : *pMemPools)
+ {
+ pool.update();
+ }
+
+ NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph");
+}
+
+void ArmnnPreparedModel::ExecuteWithDummyInputs()
+{
+ std::vector<std::vector<char>> storage;
+ armnn::InputTensors inputTensors;
+ for (unsigned int i = 0; i < m_Model.inputIndexes.size(); i++)
+ {
+ const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+ storage.emplace_back(inputTensorInfo.GetNumBytes());
+ const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
+
+ inputTensors.emplace_back(i, inputTensor);
+ }
+
+ armnn::OutputTensors outputTensors;
+ for (unsigned int i = 0; i < m_Model.outputIndexes.size(); i++)
+ {
+ const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
+ storage.emplace_back(outputTensorInfo.GetNumBytes());
+ const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
+
+ outputTensors.emplace_back(i, outputTensor);
+ }
+
+ try
+ {
+ m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+ }
+ catch (armnn::Exception& e)
+ {
+ ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
+ }
+}
+
+AndroidNnCpuExecutorPreparedModel::AndroidNnCpuExecutorPreparedModel(const Model& model,
+ const std::string& requestInputsAndOutputsDumpDir)
+: m_Model(model)
+, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+, m_RequestCount(0)
+{
+}
+
+bool AndroidNnCpuExecutorPreparedModel::Initialize()
+{
+ return setRunTimePoolInfosFromHidlMemories(&m_ModelPoolInfos, m_Model.pools);
+}
+
+Return<ErrorStatus> AndroidNnCpuExecutorPreparedModel::execute(const Request& request,
+ const ::android::sp<IExecutionCallback>& callback)
+{
+ m_RequestCount++;
+ std::vector<android::nn::RunTimePoolInfo> requestPoolInfos;
+
+ if (!setRunTimePoolInfosFromHidlMemories(&requestPoolInfos, request.pools))
+ {
+ NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "AndroidNnCpuExecutorPreparedModel::execute");
+ return ErrorStatus::GENERAL_FAILURE;
+ }
+
+ if (!m_RequestInputsAndOutputsDumpDir.empty())
+ {
+ ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
+ }
+
+ DumpTensorsIfRequired(
+ "Input",
+ m_Model.inputIndexes,
+ request.inputs,
+ requestPoolInfos);
+
+ android::nn::CpuExecutor executor;
+ const int n = executor.run(m_Model, request, m_ModelPoolInfos, requestPoolInfos);
+ ErrorStatus executionStatus =
+ n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
+
+ DumpTensorsIfRequired(
+ "Output",
+ m_Model.outputIndexes,
+ request.outputs,
+ requestPoolInfos);
+
+ NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "AndroidNnCpuExecutorPreparedModel::execute");
+ return executionStatus;
+}
+
+void AndroidNnCpuExecutorPreparedModel::DumpTensorsIfRequired(
+ char const* tensorNamePrefix,
+ const hidl_vec<uint32_t>& operandIndices,
+ const hidl_vec<RequestArgument>& requestArgs,
+ const std::vector<android::nn::RunTimePoolInfo>& requestPoolInfos)
+{
+ if (m_RequestInputsAndOutputsDumpDir.empty())
+ {
+ return;
+ }
+
+ for (std::size_t i = 0; i < requestArgs.size(); ++i)
+ {
+ const Operand& operand = m_Model.operands[operandIndices[i]];
+ const armnn::TensorInfo tensorInfo = GetTensorInfoForOperand(operand);
+ const armnn::Tensor tensor = GetTensorForRequestArgument(requestArgs[i], tensorInfo, requestPoolInfos);
+ const std::string tensorName = BuildTensorName(tensorNamePrefix, i);
+ if (tensor.GetMemoryArea() != nullptr)
+ {
+ std::string requestName = boost::str(boost::format("%1%_%2%.dump") % this % m_RequestCount);
+ DumpTensor(m_RequestInputsAndOutputsDumpDir, requestName, tensorName, tensor);
+ }
+ else
+ {
+ ALOGE("Cannot dump tensor %s. An error occurred converting the associated request argument to a tensor.",
+ tensorName.c_str());
+ }
+ }
+}
+
+}
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
new file mode 100644
index 00000000..f61d56ce
--- /dev/null
+++ b/ArmnnPreparedModel.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "RequestThread.hpp"
+
+#include "HalInterfaces.h"
+#include "NeuralNetworks.h"
+#include <armnn/ArmNN.hpp>
+
+#include <string>
+#include <vector>
+
+namespace armnn_driver
+{
+
+class ArmnnPreparedModel : public IPreparedModel
+{
+public:
+ ArmnnPreparedModel(armnn::NetworkId networkId,
+ armnn::IRuntime* runtime,
+ const Model& model,
+ const std::string& requestInputsAndOutputsDumpDir);
+
+ virtual ~ArmnnPreparedModel();
+
+ virtual Return<ErrorStatus> execute(const Request& request,
+ const ::android::sp<IExecutionCallback>& callback) override;
+
+ /// execute the graph prepared from the request
+ void ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+ std::shared_ptr<armnn::InputTensors>& pInputTensors,
+ std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
+ const ::android::sp<IExecutionCallback>& callback);
+
+ /// Executes this model with dummy inputs (e.g. all zeroes).
+ void ExecuteWithDummyInputs();
+
+private:
+
+ template <typename TensorBindingCollection>
+ void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
+
+ armnn::NetworkId m_NetworkId;
+ armnn::IRuntime* m_Runtime;
+ Model m_Model;
+ // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
+ // It is specific to this class, so it is declared as static here
+ static RequestThread m_RequestThread;
+ uint32_t m_RequestCount;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+};
+
+class AndroidNnCpuExecutorPreparedModel : public IPreparedModel
+{
+public:
+
+ AndroidNnCpuExecutorPreparedModel(const Model& model, const std::string& requestInputsAndOutputsDumpDir);
+ virtual ~AndroidNnCpuExecutorPreparedModel() { }
+
+ bool Initialize();
+
+ virtual Return<ErrorStatus> execute(const Request& request,
+ const ::android::sp<IExecutionCallback>& callback) override;
+
+private:
+
+ void DumpTensorsIfRequired(
+ char const* tensorNamePrefix,
+ const hidl_vec<uint32_t>& operandIndices,
+ const hidl_vec<RequestArgument>& requestArgs,
+ const std::vector<android::nn::RunTimePoolInfo>& requestPoolInfos);
+
+ Model m_Model;
+ std::vector<android::nn::RunTimePoolInfo> m_ModelPoolInfos;
+ const std::string& m_RequestInputsAndOutputsDumpDir;
+ uint32_t m_RequestCount;
+};
+
+}
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..18e83ec1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2017 ARM Limited.
+
+SPDX-License-Identifier: MIT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp
new file mode 100644
index 00000000..68ebef00
--- /dev/null
+++ b/ModelToINetworkConverter.cpp
@@ -0,0 +1,1848 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "ModelToINetworkConverter.hpp"
+#include "OperationsUtils.h"
+
+#include <armnn/LayerSupport.hpp>
+#include <Permute.hpp>
+
+#include <log/log.h>
+#include <cassert>
+
+#include <boost/format.hpp>
+#include <boost/core/ignore_unused.hpp>
+#include <boost/test/tools/floating_point_comparison.hpp>
+#include <boost/cast.hpp>
+
+namespace
+{
+using namespace armnn_driver;
+using namespace android::nn;
+
+// Convenience function to log the reason for failing to convert a model.
+// @return Always returns false (so that it can be used by callers as a quick way to signal an error and return)
+template<class... Args>
+static bool Fail(const char* formatStr, Args&&... args)
+{
+ ALOGD(formatStr, std::forward<Args>(args)...);
+ return false;
+}
+
+// Convenience function to call an Is*Supported function and log caller name together with reason for lack of support.
+// Called as: IsLayerSupported(__func__, Is*Supported, a, b, c, d, e)
+template<typename IsLayerSupportedFunc, typename ... Args>
+bool IsLayerSupported(const char* funcName, IsLayerSupportedFunc f, Args&&... args)
+{
+ std::vector<char> unsupportedReason(1024+1);
+ bool isSupported = f(std::forward<Args>(args)..., unsupportedReason.data(), unsupportedReason.size()-1);
+ if(isSupported)
+ {
+ return true;
+ }
+ else
+ {
+ std::string sUnsupportedReason(unsupportedReason.data());
+ if (sUnsupportedReason.size() > 0)
+ {
+ ALOGD("%s: not supported by armnn: %s", funcName, sUnsupportedReason.c_str());
+ } else
+ {
+ ALOGD("%s: not supported by armnn", funcName);
+ }
+ return false;
+ }
+}
+
+armnn::TensorShape GetTensorShapeForOperand(const Operand& operand)
+{
+ return armnn::TensorShape(operand.dimensions.size(), operand.dimensions.data());
+}
+
+inline bool IsOperandTypeSupportedForTensors(OperandType type)
+{
+ return type == OperandType::TENSOR_FLOAT32 ||
+ type == OperandType::TENSOR_QUANT8_ASYMM ||
+ type == OperandType::TENSOR_INT32;
+}
+
+void CalcPadding(uint32_t input, uint32_t kernel, uint32_t stride, uint32_t& outPadHead, uint32_t& outPadTail,
+ android::nn::PaddingScheme scheme)
+{
+ int32_t padHead;
+ int32_t padTail;
+ calculateExplicitPadding(input, stride, kernel, scheme, &padHead, &padTail);
+ outPadHead = boost::numeric_cast<uint32_t>(padHead);
+ outPadTail = boost::numeric_cast<uint32_t>(padTail);
+}
+
+bool ValidateBroadcast(const Model& model, const Operation& operation, uint32_t numInputs)
+{
+ assert(operation.inputs.size() > 0); // This should have been validated by the caller
+ // validateModel() has been called already so we know the operation.inputs indexes are valid within model.operands.
+ const Operand& firstInput = model.operands[operation.inputs[0]];
+
+ // We don't support broadcasting yet - we require all input operands to have the same shape
+ for (uint32_t i = 1; i < numInputs; ++i)
+ {
+ const Operand& otherInput = model.operands[operation.inputs[i]];
+
+ if (firstInput.dimensions.size() != otherInput.dimensions.size())
+ {
+ return Fail("%s: Broadcasting not supported (Input 0 dims: %i Input %i dims: %i)",
+ __func__, firstInput.dimensions.size(), i, otherInput.dimensions.size());
+ }
+
+ for (unsigned int d = 0; d < firstInput.dimensions.size(); ++d)
+ {
+ if (firstInput.dimensions[d] != otherInput.dimensions[d])
+ {
+ return Fail("%s: Broadcasting not supported (Dimension %i size mismatch. "
+ "Input 0: %i Input %i: %i)",
+ __func__, d, firstInput.dimensions[d], i, otherInput.dimensions[d]);
+ }
+ }
+ }
+
+ return true;
+}
+
+Shape GetOperandShape(const Operand& operand)
+{
+ Shape shape;
+ shape.type = operand.type;
+ shape.dimensions = operand.dimensions;
+ shape.scale = operand.scale;
+ shape.offset = operand.zeroPoint;
+ return shape;
+}
+
+// ArmNN requires the bias scale to be equal to the product of the weight and input scales, which is also
+// what AndroidNN requires. However for some of the AndroidNN tests the values don't exactly match so
+// we accept some tolerance. We don't want to ArmNN itself to accept these inconsistencies as it is up to the user
+// (us, in this case) to ensure they match.
+void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
+ const armnn::TensorInfo& weightInfo, const armnn::TensorInfo& inputInfo)
+{
+ const float expectedBiasScale = weightInfo.GetQuantizationScale() * inputInfo.GetQuantizationScale();
+ if (biasInfo.GetQuantizationScale() != expectedBiasScale)
+ {
+ boost::math::fpc::close_at_tolerance<float> comparer(boost::math::fpc::percent_tolerance(1.0f));
+ if (comparer(biasInfo.GetQuantizationScale(), expectedBiasScale))
+ {
+ ALOGW("Bias quantization scale has been modified to match input*weights");
+ biasInfo.SetQuantizationScale(expectedBiasScale);
+ }
+ }
+}
+
+const armnn::PermutationVector NHWCToArmNN({ 0U, 2U, 3U, 1U });
+
+template <typename OSlot>
+armnn::IConnectableLayer& AddPermuteLayer(armnn::INetwork& network, OSlot& input,
+ const armnn::PermutationVector& mappings)
+{
+ // Add swizzle layer
+ armnn::IConnectableLayer* const layer = network.AddPermuteLayer(mappings);
+
+ assert(layer != nullptr);
+
+ // Connect intput to swizzle layer
+ input.Connect(layer->GetInputSlot(0));
+
+ // Setup swizzled output
+ const armnn::TensorInfo outInfo = armnnUtils::Permuted(input.GetTensorInfo(), mappings);
+ layer->GetOutputSlot(0).SetTensorInfo(outInfo);
+
+ return *layer;
+}
+
+armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerInputHandle& input,
+ armnn::IConnectableLayer& firstLayer,
+ armnn::IConnectableLayer& lastLayer)
+{
+ static const armnn::PermutationVector ArmNNToNHWC({ 0U, 3U, 1U, 2U });
+
+ // Add swizzle layer
+ armnn::IConnectableLayer& swizzleLayer = AddPermuteLayer(network, input, NHWCToArmNN);
+
+ // Connect swizzled input to layer
+ swizzleLayer.GetOutputSlot(0).Connect(firstLayer.GetInputSlot(0));
+
+ // Add deswizzle layer
+ armnn::IConnectableLayer& deswizzleLayer = AddPermuteLayer(network, lastLayer.GetOutputSlot(0), ArmNNToNHWC);
+
+ return deswizzleLayer;
+}
+
+armnn::IConnectableLayer& SwizzleInDeswizzleOut(armnn::INetwork& network, LayerInputHandle& input,
+ armnn::IConnectableLayer& layer)
+{
+ return SwizzleInDeswizzleOut(network, input, layer, layer);
+}
+} // namespace
+
+namespace armnn_driver
+{
+
+class ConstTensorPin
+{
+public:
+ // Creates an invalid tensor pin (can be used to signal errors)
+ ConstTensorPin() {}
+
+ // @param tensorInfo TensorInfo associated with the tensor.
+ // @param valueStart Start address of tensor data. Belongs to one of the memory pools associated with
+ // the model being converted.
+ // @param numBytes Number of bytes for the tensor data.
+ ConstTensorPin(const armnn::TensorInfo& tensorInfo, const void* valueStart, uint32_t numBytes,
+ const armnn::PermutationVector& mappings)
+ {
+ boost::ignore_unused(numBytes);
+ assert(tensorInfo.GetNumBytes() == numBytes);
+
+ const bool needsSwizzling = (mappings.GetSize() > 0);
+ if (needsSwizzling)
+ {
+ m_SwizzledTensorData.resize(tensorInfo.GetNumBytes());
+ SwizzleAndroidNn4dTensorToArmNn(tensorInfo, valueStart, m_SwizzledTensorData.data(), mappings);
+
+ m_ConstTensor = armnn::ConstTensor(armnnUtils::Permuted(tensorInfo, mappings), m_SwizzledTensorData.data());
+ }
+ else
+ {
+ m_ConstTensor = armnn::ConstTensor(tensorInfo, valueStart);
+ }
+ }
+
+ ConstTensorPin(const ConstTensorPin& other) = delete;
+ ConstTensorPin(ConstTensorPin&& other) = default;
+
+ bool IsValid() const { return m_ConstTensor.GetMemoryArea() != nullptr; }
+ const armnn::ConstTensor& GetConstTensor() const { return m_ConstTensor; }
+
+private:
+ armnn::ConstTensor m_ConstTensor;
+ // Owned memory for swizzled tensor data, only required if the tensor needed
+ // swizzling. Otherwise, @ref m_ConstTensor will reference memory from one of
+ // the pools associated with the model being converted.
+ std::vector<uint8_t> m_SwizzledTensorData;
+};
+
+ModelToINetworkConverter::ModelToINetworkConverter(armnn::Compute compute, const Model& model,
+ const std::set<unsigned int>& forcedUnsupportedOperations)
+ : m_Compute(compute)
+ , m_Model(model)
+ , m_ForcedUnsupportedOperations(forcedUnsupportedOperations)
+ , m_Network(nullptr, nullptr)
+ , m_ConversionResult(ConversionResult::Success)
+{
+ try
+ {
+ Convert();
+ }
+ catch (armnn::Exception& e)
+ {
+ m_ConversionResult = ConversionResult::UnsupportedFeature;
+ ALOGE("%s: Unexpected exception: %s", __func__, e.what());
+ assert(false);
+ }
+}
+
+void ModelToINetworkConverter::Convert()
+{
+ ALOGV("ModelToINetworkConverter::Convert(): %s", GetModelSummary(m_Model).c_str());
+
+ // map the memory pool into shared pointers
+ m_MemPools.clear();
+ if (!setRunTimePoolInfosFromHidlMemories(&m_MemPools, m_Model.pools))
+ {
+ Fail("%s: Setting of run time pool infos from Hidl Memories has failed.", __func__);
+ m_ConversionResult = ConversionResult::ErrorMappingPools;
+ return;
+ }
+
+ uint32_t totalPoolSize = 0;
+ for (auto&& pool : m_Model.pools)
+ {
+ totalPoolSize += pool.size();
+ }
+
+ // Create armnn::INetwork
+ m_Network = armnn::INetwork::Create();
+
+ // add operations to it
+ // track which layer outputs each operand
+ m_OutputSlotForOperand = std::vector<armnn::IOutputSlot*>(m_Model.operands.size(), nullptr);
+
+ try
+ {
+ for (uint32_t i = 0; i < m_Model.inputIndexes.size(); i++)
+ {
+ // inputs in android nn are represented by operands
+ uint32_t inputIndex = m_Model.inputIndexes[i];
+ const Operand& operand = m_Model.operands[inputIndex];
+ const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand);
+ armnn::IConnectableLayer* layer = m_Network->AddInputLayer(i);
+
+ armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+ outputSlot.SetTensorInfo(GetTensorInfoForOperand(operand));
+
+ // store for later layers
+ m_OutputSlotForOperand[inputIndex] = &outputSlot;
+ }
+ }
+ catch (UnsupportedOperand& e)
+ {
+ Fail("%s: Operand type %s not supported in ArmnnDriver", __func__, toString(e.m_type).c_str());
+ m_ConversionResult = ConversionResult::UnsupportedFeature;
+ }
+ catch (const armnn::InvalidArgumentException& e)
+ {
+ Fail("%s: Failed to convert input operand to TensorShape: %s", __func__, e.what());
+ m_ConversionResult = ConversionResult::UnsupportedFeature;
+ }
+
+ for (uint32_t operationIdx = 0; operationIdx < m_Model.operations.size(); operationIdx++)
+ {
+ const auto& operation = m_Model.operations[operationIdx];
+
+ bool ok = true;
+ if (m_ForcedUnsupportedOperations.find(operationIdx) != m_ForcedUnsupportedOperations.end())
+ {
+ Fail("%s: Operation at index %i has been forced to be unsupported.", __func__, operationIdx);
+ ok = false;
+ }
+
+ if (ok)
+ {
+ try
+ {
+ ok = ConvertOperation(operation);
+ }
+ catch (UnsupportedOperand& e)
+ {
+ Fail("%s: Operand type %s not supported in ArmnnDriver", __func__, toString(e.m_type).c_str());
+ ok = false;
+ }
+ catch (const armnn::InvalidArgumentException& e)
+ {
+ Fail("%s: Failed to convert operation in %s", __func__, e.what());
+ ok = false;
+ }
+ }
+
+ // Store whether this operation was successfully converted.
+ m_OperationSupported.emplace(operationIdx, ok);
+
+ // Any single operation failing will fail the entire conversion.
+ // We still need to continue and check the other ones.
+ if (!ok)
+ {
+ m_ConversionResult = ConversionResult::UnsupportedFeature;
+ }
+ }
+ try
+ {
+ if (m_ConversionResult == ConversionResult::Success)
+ {
+ for (uint32_t i = 0; i < m_Model.outputIndexes.size(); i++)
+ {
+ // outputs in android nn are represented by operands
+ uint32_t outputIndex = m_Model.outputIndexes[i];
+ const Operand& operand = m_Model.operands[outputIndex];
+ const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand);
+ armnn::IConnectableLayer* layer = m_Network->AddOutputLayer(i);
+
+ assert(m_OutputSlotForOperand[outputIndex]);
+ m_OutputSlotForOperand[outputIndex]->Connect(layer->GetInputSlot(0));
+ }
+ }
+ }
+ catch (const armnn::InvalidArgumentException& e)
+ {
+ Fail("%s: Failed to convert output operand to TensorShape: %s", __func__, e.what());
+ m_ConversionResult = ConversionResult::UnsupportedFeature;
+ }
+}
+
+bool ModelToINetworkConverter::ConvertOperation(const Operation& operation)
+{
+ switch (operation.type)
+ {
+ case OperationType::ADD: return ConvertAdd(operation);
+ case OperationType::AVERAGE_POOL_2D: return ConvertAveragePool2d(operation);
+ case OperationType::CONCATENATION: return ConvertConcatenation(operation);
+ case OperationType::CONV_2D: return ConvertConv2d(operation);
+ case OperationType::DEPTHWISE_CONV_2D: return ConvertDepthwiseConv2d(operation);
+ case OperationType::FLOOR: return ConvertFloor(operation);
+ case OperationType::FULLY_CONNECTED: return ConvertFullyConnected(operation);
+ case OperationType::LOCAL_RESPONSE_NORMALIZATION: return ConvertLocalResponseNormalization(operation);
+ case OperationType::LOGISTIC: return ConvertLogistic(operation);
+ case OperationType::L2_NORMALIZATION: return ConvertL2Normalization(operation);
+ case OperationType::L2_POOL_2D: return ConvertL2Pool2d(operation);
+ case OperationType::MAX_POOL_2D: return ConvertMaxPool2d(operation);
+ case OperationType::MUL: return ConvertMul(operation);
+ case OperationType::RELU: return ConvertReLu(operation);
+ case OperationType::RELU1: return ConvertReLu1(operation);
+ case OperationType::RELU6: return ConvertReLu6(operation);
+ case OperationType::SOFTMAX: return ConvertSoftmax(operation);
+ case OperationType::TANH: return ConvertTanH(operation);
+ case OperationType::RESHAPE: return ConvertReshape(operation);
+ case OperationType::RESIZE_BILINEAR: return ConvertResizeBilinear(operation);
+ default: return Fail("%s: Operation type %s not supported in ArmnnDriver",
+ __func__, toString(operation.type).c_str());
+ }
+}
+
+class LayerInputHandle
+{
+public:
+ LayerInputHandle()
+ : m_OutputSlot(nullptr)
+ , m_Valid(false)
+ {}
+
+ LayerInputHandle(bool valid, armnn::IOutputSlot* outputSlot, armnn::TensorInfo tensorInfo)
+ : m_OutputSlot(outputSlot)
+ , m_Valid(valid)
+ , m_TensorInfo(tensorInfo)
+ {}
+
+ bool IsValid() const { return m_Valid; }
+ void Connect(armnn::IInputSlot& inputSlot)
+ {
+ assert(IsValid());
+
+ if (m_OutputSlot)
+ {
+ m_OutputSlot->Connect(inputSlot);
+ }
+ }
+ const armnn::TensorInfo& GetTensorInfo() const { return m_TensorInfo; }
+
+private:
+ armnn::IOutputSlot* m_OutputSlot;
+ bool m_Valid;
+ armnn::TensorInfo m_TensorInfo;
+};
+
+bool ModelToINetworkConverter::ConvertAdd(const Operation& operation)
+{
+ LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0);
+ LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1);
+
+ if (!input0.IsValid() || !input1.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ ActivationFn activationFunction;
+ if (!GetInputActivationFunction(operation, 2, activationFunction))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* outputOperand = GetOutputOperand(operation, 0);
+ if (!outputOperand)
+ {
+ return false;
+ }
+
+ const armnn::TensorInfo outInfo = GetTensorInfoForOperand(*outputOperand);
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsAdditionSupported,
+ m_Compute,
+ input0.GetTensorInfo(),
+ input1.GetTensorInfo(),
+ outInfo))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* const startLayer = m_Network->AddAdditionLayer();
+ armnn::IConnectableLayer* const endLayer = ProcessActivation(outInfo, activationFunction, startLayer);
+
+ const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo();
+ const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo();
+
+ if (endLayer != nullptr)
+ {
+ // If the number of dimensions do not match then we need to add degenerate dimensions
+ // to the "smaller" tensor using a reshape:
+ // Small Big
+ // | |
+ // Reshape |
+ // \ /
+ // Add
+ if (inputTensorInfo0.GetNumDimensions() != inputTensorInfo1.GetNumDimensions())
+ {
+ bool input0IsBigger = inputTensorInfo0.GetNumDimensions() > inputTensorInfo1.GetNumDimensions();
+
+ LayerInputHandle& smallTensorHandle = input0IsBigger ? input1 : input0;
+ const armnn::TensorInfo& smallTensorDims = smallTensorHandle.GetTensorInfo();
+
+ LayerInputHandle& bigTensorHandle = input0IsBigger ? input0 : input1;
+ const armnn::TensorInfo& bigTensorDims = bigTensorHandle.GetTensorInfo();
+
+ std::vector<unsigned int> reshapedDims(bigTensorDims.GetNumDimensions(), 1);
+ unsigned int sizeDifference = bigTensorDims.GetNumDimensions() - smallTensorDims.GetNumDimensions();
+ for (unsigned i = sizeDifference; i < bigTensorDims.GetNumDimensions(); ++i)
+ {
+ reshapedDims[i] = smallTensorDims.GetShape()[i-sizeDifference];
+ }
+ armnn::TensorInfo reshapedInfo = smallTensorDims;
+ reshapedInfo.SetShape(armnn::TensorShape{ static_cast<unsigned int>(reshapedDims.size()),
+ reshapedDims.data() });
+
+ armnn::ReshapeDescriptor reshapeDesc;
+ reshapeDesc.m_TargetShape = reshapedInfo.GetShape();
+ armnn::IConnectableLayer* const reshapeLayer = m_Network->AddReshapeLayer(reshapeDesc);
+ smallTensorHandle.Connect(reshapeLayer->GetInputSlot(0));
+ reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
+
+ // Connect the outputs from new reshape and original input layer
+ reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
+ bigTensorHandle.Connect(startLayer->GetInputSlot(1));
+ }
+ else
+ {
+ input0.Connect(startLayer->GetInputSlot(0));
+ input1.Connect(startLayer->GetInputSlot(1));
+ }
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer);
+ }
+ else
+ {
+ return Fail("%s: ProcessActivation failed", __func__);
+ }
+}
+
+bool ModelToINetworkConverter::ConvertAveragePool2d(const Operation& operation)
+{
+ return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::Average);
+}
+
+bool ModelToINetworkConverter::ConvertConcatenation(const Operation& operation)
+{
+ // The first N (0..N-1) inputs are tensors. The Nth input is the concatenation axis.
+ if (operation.inputs.size() <= 1)
+ {
+ return Fail("%s: Operation has insufficient arguments", __func__);
+ }
+
+ // Get inputs and outputs
+ const std::size_t numInputTensors = operation.inputs.size() - 1;
+
+ std::vector<LayerInputHandle> inputHandles;
+ std::vector<armnn::TensorShape> inputShapes;
+
+ inputHandles.reserve(numInputTensors);
+ inputShapes.reserve(numInputTensors);
+
+ for (uint32_t i = 0; i < numInputTensors; ++i)
+ {
+ const Operand* const operand = GetInputOperand(operation, i);
+ if (!operand)
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ inputShapes.emplace_back(GetTensorShapeForOperand(*operand));
+ inputHandles.emplace_back(ConvertToLayerInputHandle(operation, i));
+ if (!inputHandles.back().IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+ }
+
+ assert(inputShapes.size() == inputHandles.size());
+
+ uint32_t concatDim;
+ if (!GetInputScalar(operation, numInputTensors, OperandType::INT32, concatDim))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* const outputOperand = GetOutputOperand(operation, 0);
+ if (!outputOperand)
+ {
+ return Fail("%s: Operation has no outputs", __func__);
+ }
+ const armnn::TensorShape outputShape = GetTensorShapeForOperand(*outputOperand);
+
+ // Create an armnn merger layer descriptor - this will also perform validation on the input shapes
+ armnn::OriginsDescriptor mergerDescriptor;
+ try
+ {
+ mergerDescriptor = armnn::CreateMergerDescriptorForConcatenation(inputShapes.begin(), inputShapes.end(),
+ concatDim);
+ }
+ catch (const armnn::Exception& error)
+ {
+ return Fail("%s: Error preparing merger descriptor. %s", __func__, error.what());
+ }
+
+ // Validate the output shape is correct given the input shapes (which have just been validated)
+ unsigned int numDimensions = inputShapes[0].GetNumDimensions();
+ if (outputShape.GetNumDimensions() != numDimensions)
+ {
+ return Fail("%s: Output shape has wrong number of dimensions", __func__);
+ }
+
+ unsigned int outputSizeAlongConcatenatedDimension = 0;
+ for (unsigned int i = 0; i < inputShapes.size(); i++)
+ {
+ outputSizeAlongConcatenatedDimension += inputShapes[i][concatDim];
+ }
+
+ for (unsigned int i = 0; i < numDimensions; ++i)
+ {
+ if (i == concatDim)
+ {
+ if (outputShape[i] != outputSizeAlongConcatenatedDimension)
+ {
+ return Fail("%s: Invalid output shape", __func__);
+ }
+ }
+ else
+ {
+ if (outputShape[i] != inputShapes[0][i])
+ {
+ return Fail("%s: Invalid output shape", __func__);
+ }
+ }
+ }
+
+ std::vector<const armnn::TensorInfo*> inputTensorInfos;
+ std::transform(inputHandles.begin(), inputHandles.end(), std::back_inserter(inputTensorInfos),
+ [](const LayerInputHandle& h) -> const armnn::TensorInfo*{ return &h.GetTensorInfo(); });
+ if (!IsLayerSupported(__func__,
+ armnn::IsMergerSupported,
+ m_Compute,
+ inputTensorInfos,
+ mergerDescriptor))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* layer = m_Network->AddMergerLayer(mergerDescriptor);
+ assert(layer != nullptr);
+
+ // Connect inputs to the layer
+ const int numInputSlots = layer->GetNumInputSlots();
+ assert(static_cast<std::size_t>(numInputSlots) == inputHandles.size());
+ for (int i = 0; i < numInputSlots; ++i)
+ {
+ inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(i));
+ }
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
+}
+
+bool ModelToINetworkConverter::ConvertConv2d(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ const armnn::TensorInfo swizzledInputInfo = armnnUtils::Permuted(inputInfo, NHWCToArmNN);
+ const armnn::TensorInfo swizzledOutputInfo = armnnUtils::Permuted(outputInfo, NHWCToArmNN);
+
+ // ArmNN does not currently support non-fixed weights or bias
+ const ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1, NHWCToArmNN);
+ const ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2);
+
+ if (!weightsPin.IsValid() || !biasPin.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ armnn::ConstTensor weights = weightsPin.GetConstTensor();
+ armnn::ConstTensor bias = biasPin.GetConstTensor();
+ SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), swizzledInputInfo);
+
+ armnn::Convolution2dDescriptor desc;
+ ActivationFn activation;
+
+ if (operation.inputs.size() == 10)
+ {
+ if (!GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadLeft) ||
+ !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadRight) ||
+ !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PadTop) ||
+ !GetInputScalar(operation, 6, OperandType::INT32, desc.m_PadBottom) ||
+ !GetInputScalar(operation, 7, OperandType::INT32, desc.m_StrideX) ||
+ !GetInputScalar(operation, 8, OperandType::INT32, desc.m_StrideY) ||
+ !GetInputActivationFunction(operation, 9, activation))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+ }
+ else if (operation.inputs.size() == 7)
+ {
+ android::nn::PaddingScheme paddingScheme;
+
+ if (!GetInputPaddingScheme(operation, 3, paddingScheme) ||
+ !GetInputScalar(operation, 4, OperandType::INT32, desc.m_StrideX) ||
+ !GetInputScalar(operation, 5, OperandType::INT32, desc.m_StrideY) ||
+ !GetInputActivationFunction(operation, 6, activation))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const uint32_t kernelX = weights.GetShape()[3];
+ const uint32_t kernelY = weights.GetShape()[2];
+ const uint32_t inputX = swizzledInputInfo.GetShape()[3];
+ const uint32_t inputY = swizzledInputInfo.GetShape()[2];
+
+ CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, paddingScheme);
+ CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, paddingScheme);
+ }
+ else
+ {
+ return Fail("%s: Unsupported number of operation inputs", __func__);
+ }
+
+ desc.m_BiasEnabled = true;
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsConvolution2dSupported,
+ m_Compute,
+ swizzledInputInfo,
+ desc,
+ weights.GetInfo()))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* startLayer = m_Network->AddConvolution2dLayer(desc, weights, bias);
+ armnn::IConnectableLayer* endLayer = ProcessActivation(swizzledOutputInfo, activation, startLayer);
+
+ if (endLayer != nullptr)
+ {
+ armnn::IConnectableLayer& outSwizzleLayer = SwizzleInDeswizzleOut(*m_Network, input, *startLayer, *endLayer);
+ return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
+ }
+ else
+ {
+ return Fail("%s: ProcessActivation failed", __func__);
+ }
+}
+
+bool ModelToINetworkConverter::ConvertDepthwiseConv2d(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ const armnn::TensorInfo swizzledInputInfo = armnnUtils::Permuted(inputInfo, NHWCToArmNN);
+ const armnn::TensorInfo swizzledOutputInfo = armnnUtils::Permuted(outputInfo, NHWCToArmNN);
+
+ // ArmNN does not currently support non-fixed weights or bias
+
+ // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
+ // but in ArmNN it needs to be [ M, I, H, W ]
+ const Operand* weightsOperand = GetInputOperand(operation, 1);
+
+ if (weightsOperand == nullptr)
+ {
+ return Fail("%s: Operand is invalid", __func__);
+ }
+
+ // Reinterpret weight data as [ H, W, I, M ]
+ armnn::TensorShape weightsShape({ weightsOperand->dimensions[1], weightsOperand->dimensions[2],
+ inputInfo.GetShape()[3],
+ weightsOperand->dimensions[3] / inputInfo.GetShape()[3] });
+
+ // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ]
+ const armnn::PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U };
+ ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1, HWIMToMIHW, &weightsShape);
+
+ // Bias is a 1D tensor
+ ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2);
+
+ if (!weightsPin.IsValid() || !biasPin.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ armnn::ConstTensor weights = weightsPin.GetConstTensor();
+ armnn::ConstTensor bias = biasPin.GetConstTensor();
+ SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), swizzledInputInfo);
+
+ armnn::DepthwiseConvolution2dDescriptor desc;
+ ActivationFn activation;
+
+ if (operation.inputs.size() == 11)
+ {
+ if (!GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadLeft) ||
+ !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadRight) ||
+ !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PadTop) ||
+ !GetInputScalar(operation, 6, OperandType::INT32, desc.m_PadBottom) ||
+ !GetInputScalar(operation, 7, OperandType::INT32, desc.m_StrideX) ||
+ !GetInputScalar(operation, 8, OperandType::INT32, desc.m_StrideY) ||
+ !GetInputActivationFunction(operation, 10, activation))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+ }
+ else if (operation.inputs.size() == 8)
+ {
+ android::nn::PaddingScheme paddingScheme;
+
+ if (!GetInputPaddingScheme(operation, 3, paddingScheme) ||
+ !GetInputScalar(operation, 4, OperandType::INT32, desc.m_StrideX) ||
+ !GetInputScalar(operation, 5, OperandType::INT32, desc.m_StrideY) ||
+ !GetInputActivationFunction(operation, 7, activation))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const uint32_t kernelX = weights.GetShape()[3];
+ const uint32_t kernelY = weights.GetShape()[2];
+ const uint32_t inputX = swizzledInputInfo.GetShape()[3];
+ const uint32_t inputY = swizzledInputInfo.GetShape()[2];
+
+ CalcPadding(inputX, kernelX, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, paddingScheme);
+ CalcPadding(inputY, kernelY, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, paddingScheme);
+ }
+ else
+ {
+ return Fail("%s: Unsupported number of operation inputs", __func__);
+ }
+
+ desc.m_BiasEnabled = true;
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsDepthwiseConvolutionSupported,
+ m_Compute,
+ swizzledInputInfo,
+ desc,
+ weights.GetInfo()))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* startLayer = m_Network->AddDepthwiseConvolution2dLayer(desc, weights, bias);
+ armnn::IConnectableLayer* endLayer = ProcessActivation(swizzledOutputInfo, activation, startLayer);
+
+ if (endLayer != nullptr)
+ {
+ armnn::IConnectableLayer& outSwizzleLayer = SwizzleInDeswizzleOut(*m_Network, input, *startLayer, *endLayer);
+ return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
+ }
+ else
+ {
+ return Fail("%s: ProcessActivation failed", __func__);
+ }
+}
+
+bool ModelToINetworkConverter::ConvertFloor(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* const outputOperand = GetOutputOperand(operation, 0);
+ if (!outputOperand)
+ {
+ return Fail("%s: Operation has invalid outputs", __func__);
+ }
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsFloorSupported,
+ m_Compute,
+ input.GetTensorInfo(),
+ GetTensorInfoForOperand(*outputOperand)))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* layer = m_Network->AddFloorLayer();
+ assert(layer != nullptr);
+ input.Connect(layer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
+}
+
+bool ModelToINetworkConverter::ConvertFullyConnected(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ armnn::TensorInfo reshapedInfo = inputInfo;
+
+ if (inputInfo.GetNumDimensions() > 2U)
+ {
+ unsigned int dim1 = inputInfo.GetShape()[1];
+ for (unsigned int i = 2U; i < inputInfo.GetNumDimensions(); ++i)
+ {
+ dim1 *= inputInfo.GetShape()[i];
+ }
+ reshapedInfo.SetShape(armnn::TensorShape({inputInfo.GetShape()[0], dim1}));
+ }
+
+ // ArmNN does not currently support non-fixed weights or bias
+ ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin(operation, 1); // 2D
+ ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin(operation, 2); // 1D
+
+ if (!weightsPin.IsValid() || !biasPin.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ // ensuring that the bias value is within 1% of the weights input (small float differences can exist)
+ armnn::ConstTensor weights = weightsPin.GetConstTensor();
+ armnn::ConstTensor bias = biasPin.GetConstTensor();
+ SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), reshapedInfo);
+
+ ActivationFn activationFunction;
+ if (!GetInputActivationFunction(operation, 3, activationFunction))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ armnn::FullyConnectedDescriptor desc;
+ desc.m_TransposeWeightMatrix = true;
+ desc.m_BiasEnabled = true;
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsFullyConnectedSupported,
+ m_Compute,
+ reshapedInfo,
+ desc))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* startLayer = m_Network->AddFullyConnectedLayer(desc, weights, bias);
+ armnn::IConnectableLayer* endLayer = ProcessActivation(outputInfo, activationFunction, startLayer);
+
+ if (endLayer != nullptr)
+ {
+ if (inputInfo.GetNumDimensions() > 2U)
+ {
+ armnn::ReshapeDescriptor reshapeDescriptor;
+ reshapeDescriptor.m_TargetShape = reshapedInfo.GetShape();
+
+ armnn::IConnectableLayer* reshapeLayer = m_Network->AddReshapeLayer(reshapeDescriptor);
+ assert(reshapeLayer != nullptr);
+ input.Connect(reshapeLayer->GetInputSlot(0));
+ reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
+ reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
+ }
+ else
+ {
+ input.Connect(startLayer->GetInputSlot(0));
+ }
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer);
+ }
+ else
+ {
+ return Fail("%s: ProcessActivation failed", __func__);
+ }
+}
+
+bool ModelToINetworkConverter::ConvertLocalResponseNormalization(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ const armnn::TensorInfo swizzledInputInfo = armnnUtils::Permuted(inputInfo, NHWCToArmNN);
+ const armnn::TensorInfo swizzledOutputInfo = armnnUtils::Permuted(outputInfo, NHWCToArmNN);
+
+ armnn::NormalizationDescriptor descriptor;
+
+ descriptor.m_NormChannelType = armnn::NormalizationAlgorithmChannel::Across;
+ descriptor.m_NormMethodType = armnn::NormalizationAlgorithmMethod::LocalBrightness;
+
+ if (!input.IsValid() ||
+ !GetInputScalar(operation, 1, OperandType::INT32, descriptor.m_NormSize) ||
+ !GetInputFloat32(operation, 2, descriptor.m_K) ||
+ !GetInputFloat32(operation, 3, descriptor.m_Alpha) ||
+ !GetInputFloat32(operation, 4, descriptor.m_Beta))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ // ArmNN expects normSize to be the full size of the normalization
+ // window rather than the radius as in AndroidNN.
+ descriptor.m_NormSize = 1 + (2 * descriptor.m_NormSize);
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsNormalizationSupported,
+ m_Compute,
+ swizzledInputInfo,
+ swizzledOutputInfo,
+ descriptor))
+ {
+ return false;
+ }
+
+
+ armnn::IConnectableLayer* layer = m_Network->AddNormalizationLayer(descriptor);
+ assert(layer != nullptr);
+ layer->GetOutputSlot(0).SetTensorInfo(swizzledOutputInfo);
+
+ armnn::IConnectableLayer& outSwizzleLayer = SwizzleInDeswizzleOut(*m_Network, input, *layer);
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
+}
+
+bool ModelToINetworkConverter::ConvertLogistic(const Operation& operation)
+{
+ armnn::ActivationDescriptor desc;
+ desc.m_Function == armnn::ActivationFunction::Sigmoid;
+
+ return ConvertToActivation(operation, __func__, desc);
+}
+
+bool ModelToINetworkConverter::ConvertL2Normalization(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ const armnn::TensorInfo swizzledInputInfo = armnnUtils::Permuted(inputInfo, NHWCToArmNN);
+ const armnn::TensorInfo swizzledOutputInfo = armnnUtils::Permuted(outputInfo, NHWCToArmNN);
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsL2NormalizationSupported,
+ m_Compute,
+ swizzledInputInfo))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* layer = m_Network->AddL2NormalizationLayer();
+ assert(layer != nullptr);
+ layer->GetOutputSlot(0).SetTensorInfo(swizzledOutputInfo);
+
+ armnn::IConnectableLayer& outSwizzleLayer = SwizzleInDeswizzleOut(*m_Network, input, *layer);
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
+}
+
+bool ModelToINetworkConverter::ConvertL2Pool2d(const Operation& operation)
+{
+ return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::L2);
+}
+
+bool ModelToINetworkConverter::ConvertMaxPool2d(const Operation& operation)
+{
+ return ConvertPooling2d(operation, __func__, armnn::PoolingAlgorithm::Max);
+}
+
+bool ModelToINetworkConverter::ConvertMul(const Operation& operation)
+{
+ LayerInputHandle input0 = ConvertToLayerInputHandle(operation, 0);
+ LayerInputHandle input1 = ConvertToLayerInputHandle(operation, 1);
+
+ if (!input0.IsValid() || !input1.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ ActivationFn activationFunction;
+ if (!GetInputActivationFunction(operation, 2, activationFunction))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ if (!ValidateBroadcast(m_Model, operation, 2u))
+ {
+ return Fail("%s is invalid due to broadcasting", __func__);
+ }
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsMultiplicationSupported,
+ m_Compute,
+ input0.GetTensorInfo(),
+ input1.GetTensorInfo()))
+ {
+ return false;
+ }
+
+ const Operand* outputOperand = GetOutputOperand(operation, 0);
+
+ if (outputOperand == nullptr)
+ {
+ return false;
+ }
+
+ const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
+
+ armnn::IConnectableLayer* const startLayer = m_Network->AddMultiplicationLayer();
+ armnn::IConnectableLayer* const endLayer = ProcessActivation(outInfo, activationFunction, startLayer);
+
+ if (endLayer != nullptr)
+ {
+ input0.Connect(startLayer->GetInputSlot(0));
+ input1.Connect(startLayer->GetInputSlot(1));
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *endLayer);
+ }
+ else
+ {
+ return Fail("%s: ProcessActivation failed", __func__);
+ }
+}
+
+bool ModelToINetworkConverter::ConvertReLu(const Operation& operation)
+{
+ armnn::ActivationDescriptor desc;
+ desc.m_Function = armnn::ActivationFunction::ReLu;
+
+ return ConvertToActivation(operation, __func__, desc);
+}
+
+bool ModelToINetworkConverter::ConvertReLu1(const Operation& operation)
+{
+ armnn::ActivationDescriptor desc;
+ desc.m_Function = armnn::ActivationFunction::BoundedReLu;
+ desc.m_A = 1.0f;
+ desc.m_B = -1.0f;
+
+ return ConvertToActivation(operation, __func__, desc);
+}
+
+bool ModelToINetworkConverter::ConvertReLu6(const Operation& operation)
+{
+ armnn::ActivationDescriptor desc;
+ desc.m_Function = armnn::ActivationFunction::BoundedReLu;
+ desc.m_A = 6.0f;
+
+ return ConvertToActivation(operation, __func__, desc);
+}
+
+bool ModelToINetworkConverter::ConvertSoftmax(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ armnn::SoftmaxDescriptor desc;
+ if (!GetInputFloat32(operation, 1, desc.m_Beta))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsSoftmaxSupported,
+ m_Compute,
+ input.GetTensorInfo(),
+ desc))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* layer = m_Network->AddSoftmaxLayer(desc);
+ assert(layer != nullptr);
+ input.Connect(layer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
+}
+
+bool ModelToINetworkConverter::ConvertTanH(const Operation& operation)
+{
+ armnn::ActivationDescriptor desc;
+ desc.m_Function = armnn::ActivationFunction::TanH;
+ desc.m_A = 1.0f; // android nn does not support tanH parameters
+ desc.m_B = 1.0f; // set to 1.0f for unity scaling
+
+ return ConvertToActivation(operation, __func__, desc);
+}
+
+bool ModelToINetworkConverter::ConvertReshape(const Operation& operation)
+{
+ const Operand* inputOperand = GetInputOperand(operation, 0);
+ const Operand* requestedShapeOperand = GetInputOperand(operation, 1);
+ const Operand* outputOperand = GetOutputOperand(operation, 0);
+
+ if (inputOperand == nullptr
+ || requestedShapeOperand == nullptr
+ || outputOperand == nullptr)
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+
+ if (requestedShapeOperand->dimensions.size() != 1)
+ {
+ return Fail("%s: Input 1 expected to be one-dimensional (found %i dimensions)",
+ __func__, requestedShapeOperand->dimensions.size());
+ }
+
+ std::vector<int32_t> targetDimensions;
+ if (!GetTensorInt32Values(*requestedShapeOperand, targetDimensions))
+ {
+ return Fail("%s: Could not read values of input 1", __func__);
+ }
+
+ const Shape inputOperandShape = GetOperandShape(*inputOperand);
+
+ Shape requestedShape;
+ // targetDimensions may contain special values (e.g. -1). reshapePrepare() is an AndroidNN provided utility
+ // function that resolves these values into a fully specified tensor shape.
+ if (!reshapePrepare(inputOperandShape, targetDimensions.data(), targetDimensions.size(), &requestedShape))
+ {
+ return Fail("%s: Failed to resolve the requested shape", __func__);
+ }
+
+ const Shape outputOperandShape = GetOperandShape(*outputOperand);
+ if (!SameShape(requestedShape, outputOperandShape))
+ {
+ return Fail("%s: Shape of output operand does not match resolved requested shape", __func__);
+ }
+
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Could not read input 0", __func__);
+ }
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsReshapeSupported,
+ m_Compute,
+ input.GetTensorInfo()))
+ {
+ return false;
+ }
+
+
+ armnn::ReshapeDescriptor reshapeDescriptor;
+ reshapeDescriptor.m_TargetShape = armnn::TensorShape(requestedShape.dimensions.size(),
+ requestedShape.dimensions.data());
+
+ armnn::IConnectableLayer* layer = m_Network->AddReshapeLayer(reshapeDescriptor);
+ assert(layer != nullptr);
+ input.Connect(layer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
+}
+
+bool ModelToINetworkConverter::ConvertResizeBilinear(const Operation& operation)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Could not read input 0", __func__);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ const armnn::TensorInfo swizzledInputInfo = armnnUtils::Permuted(inputInfo, NHWCToArmNN);
+ const armnn::TensorInfo swizzledOutputInfo = armnnUtils::Permuted(outputInfo, NHWCToArmNN);
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsResizeBilinearSupported,
+ m_Compute,
+ swizzledInputInfo))
+ {
+ return false;
+ }
+
+ armnn::ResizeBilinearDescriptor desc;
+
+ if ( !GetInputScalar(operation, 1, OperandType::INT32, desc.m_TargetHeight)
+ || !GetInputScalar(operation, 2, OperandType::INT32, desc.m_TargetWidth))
+ {
+ return Fail("%s: Operation has invalid inputs", __func__);
+ }
+
+ armnn::IConnectableLayer* layer = m_Network->AddResizeBilinearLayer(desc);
+ assert(layer != nullptr);
+ layer->GetOutputSlot(0).SetTensorInfo(swizzledOutputInfo);
+
+ armnn::IConnectableLayer& outSwizzleLayer = SwizzleInDeswizzleOut(*m_Network, input, *layer);
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
+
+}
+
+bool ModelToINetworkConverter::ConvertToActivation(const Operation& operation,
+ const char* operationName,
+ const armnn::ActivationDescriptor& activationDesc)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Input 0 is invalid", operationName);
+ }
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsActivationSupported,
+ m_Compute,
+ input.GetTensorInfo(),
+ activationDesc))
+ {
+ return false;
+ }
+
+ armnn::IConnectableLayer* layer = m_Network->AddActivationLayer(activationDesc);
+ assert(layer != nullptr);
+ input.Connect(layer->GetInputSlot(0));
+
+ return SetupAndTrackLayerOutputSlot(operation, 0, *layer);
+}
+
+bool ModelToINetworkConverter::ConvertPooling2d(const Operation& operation,
+ const char* operationName,
+ armnn::PoolingAlgorithm poolType)
+{
+ LayerInputHandle input = ConvertToLayerInputHandle(operation, 0);
+ if (!input.IsValid())
+ {
+ return Fail("%s: Could not read input 0", operationName);
+ }
+
+ const Operand* output = GetOutputOperand(operation, 0);
+ if (!output)
+ {
+ return Fail("%s: Could not read output 0", __func__);
+ }
+
+ const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+ const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+ const armnn::TensorInfo swizzledInputInfo = armnnUtils::Permuted(inputInfo, NHWCToArmNN);
+ const armnn::TensorInfo swizzledOutputInfo = armnnUtils::Permuted(outputInfo, NHWCToArmNN);
+
+ armnn::Pooling2dDescriptor desc;
+ desc.m_PoolType = poolType;
+ desc.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor;
+
+ ActivationFn activation;
+
+ if (operation.inputs.size() == 7)
+ {
+ // one input, 6 parameters (padding, stridex, stridey, width, height, activation type)
+ android::nn::PaddingScheme scheme;
+
+ if ( !GetInputPaddingScheme(operation, 1, scheme)
+ || !GetInputScalar(operation, 2, OperandType::INT32, desc.m_StrideX)
+ || !GetInputScalar(operation, 3, OperandType::INT32, desc.m_StrideY)
+ || !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PoolWidth)
+ || !GetInputScalar(operation, 5, OperandType::INT32, desc.m_PoolHeight)
+ || !GetInputActivationFunction(operation, 6, activation))
+ {
+ return Fail("%s: Operation has invalid inputs", operationName);
+ }
+
+ const unsigned int inputWidth = swizzledInputInfo.GetShape()[3];
+ const unsigned int inputHeight = swizzledInputInfo.GetShape()[2];
+
+ CalcPadding(inputWidth, desc.m_PoolWidth, desc.m_StrideX, desc.m_PadLeft, desc.m_PadRight, scheme);
+ CalcPadding(inputHeight, desc.m_PoolHeight, desc.m_StrideY, desc.m_PadTop, desc.m_PadBottom, scheme);
+ }
+ else
+ {
+ // one input, 9 parameters (padding l r t b, stridex, stridey, width, height, activation type)
+ if ( !GetInputScalar(operation, 1, OperandType::INT32, desc.m_PadLeft)
+ || !GetInputScalar(operation, 2, OperandType::INT32, desc.m_PadRight)
+ || !GetInputScalar(operation, 3, OperandType::INT32, desc.m_PadTop)
+ || !GetInputScalar(operation, 4, OperandType::INT32, desc.m_PadBottom)
+ || !GetInputScalar(operation, 5, OperandType::INT32, desc.m_StrideX)
+ || !GetInputScalar(operation, 6, OperandType::INT32, desc.m_StrideY)
+ || !GetInputScalar(operation, 7, OperandType::INT32, desc.m_PoolWidth)
+ || !GetInputScalar(operation, 8, OperandType::INT32, desc.m_PoolHeight)
+ || !GetInputActivationFunction(operation, 9, activation))
+ {
+ return Fail("%s: Operation has invalid inputs", operationName);
+ }
+ }
+
+ // ArmNN does not accept a pool size of 1, but the ArmNN driver is expected to cope.
+ // This is mapped to a trivial splitter instead.
+ armnn::IConnectableLayer* startLayer = nullptr;
+ if (desc.m_PoolWidth != 1 || desc.m_PoolHeight != 1)
+ {
+ if (!IsLayerSupported(__func__,
+ armnn::IsPooling2dSupported,
+ m_Compute,
+ swizzledInputInfo,
+ swizzledOutputInfo,
+ desc))
+ {
+ return false;
+ }
+
+ startLayer = m_Network->AddPooling2dLayer(desc);
+ }
+ else
+ {
+ const unsigned int numDims = swizzledOutputInfo.GetNumDimensions();
+
+ armnn::ViewsDescriptor viewsDesc(1, numDims);
+
+ for (unsigned int i = 0; i < numDims; ++i)
+ {
+ viewsDesc.SetViewOriginCoord(0, i, 0);
+ viewsDesc.SetViewSize(0, i, swizzledOutputInfo.GetShape()[i]);
+ }
+
+ if (!IsLayerSupported(__func__,
+ armnn::IsSplitterSupported,
+ m_Compute,
+ swizzledInputInfo,
+ viewsDesc))
+ {
+ return false;
+ }
+
+ startLayer = m_Network->AddSplitterLayer(viewsDesc);
+ }
+
+ armnn::IConnectableLayer* endLayer = ProcessActivation(swizzledOutputInfo, activation, startLayer);
+
+ if (endLayer != nullptr)
+ {
+ armnn::IConnectableLayer& outSwizzleLayer = SwizzleInDeswizzleOut(*m_Network, input, *startLayer, *endLayer);
+ return SetupAndTrackLayerOutputSlot(operation, 0, outSwizzleLayer);
+ }
+ else
+ {
+ return Fail("%s: ProcessActivation failed", operationName);
+ }
+}
+
+const void* ModelToINetworkConverter::GetOperandValueReadOnlyAddress(const Operand& operand) const
+{
+ const void* valueStart = nullptr;
+
+ switch (operand.lifetime)
+ {
+ case OperandLifeTime::CONSTANT_COPY:
+ {
+ // Constant found in model.operandValues
+ valueStart = &m_Model.operandValues[operand.location.offset];
+ break;
+ }
+ case OperandLifeTime::CONSTANT_REFERENCE:
+ {
+ // Constant specified via a Memory object
+ valueStart = GetMemoryFromPool(operand.location, m_MemPools);
+ break;
+ }
+ default:
+ {
+ // Unsupported/invalid (e.g. can't get value of an input to the model)
+ Fail("%s: unsupported/invalid operand lifetime: %s",
+ __func__, toString(operand.lifetime).c_str());
+ valueStart = nullptr;
+ }
+ }
+
+ return valueStart;
+}
+
+const Operand* ModelToINetworkConverter::GetInputOperand(const Operation& operation, uint32_t inputIndex) const
+{
+ if (inputIndex >= operation.inputs.size())
+ {
+ Fail("%s: invalid input index: %i out of %i", __func__, inputIndex, operation.inputs.size());
+ return nullptr;
+ }
+
+ assert(operation.inputs[inputIndex] < m_Model.operands.size()); // Model should have been validated beforehand
+ return &m_Model.operands[operation.inputs[inputIndex]];
+}
+
+const Operand* ModelToINetworkConverter::GetOutputOperand(const Operation& operation, uint32_t outputIndex) const
+{
+ if (outputIndex >= operation.outputs.size())
+ {
+ Fail("%s: invalid output index: %i out of %i", __func__, outputIndex, operation.outputs.size());
+ return nullptr;
+ }
+
+ assert(operation.outputs[outputIndex] < m_Model.operands.size()); // Model should have been validated beforehand
+ return &m_Model.operands[operation.outputs[outputIndex]];
+}
+
+template<typename T>
+bool ModelToINetworkConverter::GetInputScalar(const Operation& operation, uint32_t inputIndex,
+ OperandType type, T& outValue) const
+{
+ const Operand* operand = GetInputOperand(operation, inputIndex);
+ if (!operand)
+ {
+ return Fail("%s: invalid input operand at index %i", __func__, inputIndex);
+ }
+
+ if (operand->type != type)
+ {
+ return Fail("%s: unexpected operand type: %s (should be %s)",
+ __func__, toString(operand->type).c_str(), toString(type).c_str());
+ }
+
+ if (operand->location.length != sizeof(T))
+ {
+ return Fail("%s: incorrect operand location length: %i (should be %i)",
+ __func__, operand->location.length, sizeof(T));
+ }
+
+ const void* valueAddress = GetOperandValueReadOnlyAddress(*operand);
+ if (!valueAddress)
+ {
+ return Fail("%s: failed to get address for operand", __func__);
+ }
+
+ outValue = *(static_cast<const T*>(valueAddress));
+ return true;
+}
+
+bool ModelToINetworkConverter::GetInputInt32(const Operation& operation, uint32_t inputIndex, int32_t& outValue) const
+{
+ return GetInputScalar(operation, inputIndex, OperandType::INT32, outValue);
+}
+
+bool ModelToINetworkConverter::GetInputFloat32(const Operation& operation, uint32_t inputIndex, float& outValue) const
+{
+ return GetInputScalar(operation, inputIndex, OperandType::FLOAT32, outValue);
+}
+
+bool ModelToINetworkConverter::GetInputActivationFunction(const Operation& operation,
+ uint32_t inputIndex,
+ ActivationFn& outActivationFunction) const
+{
+ int32_t activationFunctionAsInt;
+ if (!GetInputInt32(operation, inputIndex, activationFunctionAsInt))
+ {
+ return Fail("%s: failed to get activation input value", __func__);
+ }
+
+ outActivationFunction = static_cast<ActivationFn>(activationFunctionAsInt);
+ return true;
+}
+
+bool ModelToINetworkConverter::GetInputPaddingScheme(const Operation& operation,
+ uint32_t inputIndex,
+ android::nn::PaddingScheme& outPaddingScheme) const
+{
+ int32_t paddingSchemeAsInt;
+ if (!GetInputInt32(operation, inputIndex, paddingSchemeAsInt))
+ {
+ return Fail("%s: failed to get padding scheme input value", __func__);
+ }
+
+ outPaddingScheme = static_cast<android::nn::PaddingScheme>(paddingSchemeAsInt);
+ return true;
+}
+
+LayerInputHandle ModelToINetworkConverter::ConvertToLayerInputHandle(
+ const Operation& operation,
+ uint32_t inputIndex)
+{
+ const Operand* operand = GetInputOperand(operation, inputIndex);
+ if (!operand)
+ {
+ Fail("%s: failed to get input operand %i", __func__, inputIndex);
+ return LayerInputHandle();
+ }
+
+ if (!IsOperandTypeSupportedForTensors(operand->type))
+ {
+ Fail("%s: unsupported operand type for tensor %s", __func__, toString(operand->type).c_str());
+ return LayerInputHandle();
+ }
+
+ armnn::TensorInfo operandTensorInfo = GetTensorInfoForOperand(*operand);
+
+ switch (operand->lifetime)
+ {
+ case OperandLifeTime::TEMPORARY_VARIABLE: // intentional fallthrough
+ case OperandLifeTime::MODEL_INPUT:
+ {
+ // The tensor is either an operand internal to the model, or a model input.
+ // It can be associated with an ArmNN output slot for an existing layer.
+
+ // m_OutputSlotForOperand[...] can be nullptr if the previous layer could not be converted
+ const uint32_t operandIndex = operation.inputs[inputIndex];
+ return LayerInputHandle(true, m_OutputSlotForOperand[operandIndex], operandTensorInfo);
+ break;
+ }
+ case OperandLifeTime::CONSTANT_COPY:
+ case OperandLifeTime::CONSTANT_REFERENCE:
+ {
+ // The tensor has an already known constant value, and can be converted into an ArmNN Constant layer.
+ ConstTensorPin tensorPin = ConvertOperandToConstTensorPin(*operand);
+ if (tensorPin.IsValid())
+ {
+ if (!IsLayerSupported(__func__,
+ armnn::IsConstantSupported,
+ m_Compute,
+ tensorPin.GetConstTensor().GetInfo()))
+ {
+ return LayerInputHandle();
+ }
+
+ armnn::IConnectableLayer* constantLayer = m_Network->AddConstantLayer(tensorPin.GetConstTensor());
+ armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
+ outputSlot.SetTensorInfo(tensorPin.GetConstTensor().GetInfo());
+
+ return LayerInputHandle(true, &outputSlot, operandTensorInfo);
+ }
+ else
+ {
+ Fail("%s: invalid operand tensor", __func__);
+ return LayerInputHandle();
+ }
+ break;
+ }
+ default:
+ {
+ // Unsupported lifetime for an input tensor
+ Fail("%s: unsupported lifetime for input tensor: %s",
+ __func__, toString(operand->lifetime).c_str());
+ return LayerInputHandle();
+ }
+ }
+}
+
+ConstTensorPin ModelToINetworkConverter::ConvertOperationInputToConstTensorPin(const Operation& operation,
+ uint32_t inputIndex, const armnn::PermutationVector& dimensionMappings,
+ const armnn::TensorShape* overrideTensorShape)
+{
+ const Operand* operand = GetInputOperand(operation, inputIndex);
+ if (!operand)
+ {
+ Fail("%s: failed to get input operand", __func__);
+ return ConstTensorPin();
+ }
+
+ return ConvertOperandToConstTensorPin(*operand, dimensionMappings, overrideTensorShape);
+}
+
+ConstTensorPin ModelToINetworkConverter::ConvertOperandToConstTensorPin(const Operand& operand,
+ const armnn::PermutationVector& dimensionMappings, const armnn::TensorShape* overrideTensorShape)
+{
+ if (!IsOperandTypeSupportedForTensors(operand.type))
+ {
+ Fail("%s: unsupported operand type for tensor %s", __func__, toString(operand.type).c_str());
+ return ConstTensorPin();
+ }
+
+ if (operand.lifetime != OperandLifeTime::CONSTANT_COPY && operand.lifetime != OperandLifeTime::CONSTANT_REFERENCE)
+ {
+ Fail("%s: invalid operand lifetime: %s", __func__, toString(operand.lifetime).c_str());
+ return ConstTensorPin();
+ }
+
+ const void* const valueStart = GetOperandValueReadOnlyAddress(operand);
+ if (!valueStart)
+ {
+ Fail("%s: failed to get operand address", __func__);
+ return ConstTensorPin();
+ }
+
+ armnn::TensorInfo tensorInfo = GetTensorInfoForOperand(operand);
+ if (overrideTensorShape != nullptr)
+ {
+ tensorInfo.SetShape(*overrideTensorShape);
+ }
+ return ConstTensorPin(tensorInfo, valueStart, operand.location.length, dimensionMappings);
+}
+
+bool ModelToINetworkConverter::GetTensorInt32Values(const Operand& operand, std::vector<int32_t>& outValues) const
+{
+ if (operand.type != OperandType::TENSOR_INT32)
+ {
+ return Fail("%s: invalid operand type: %s", __func__, toString(operand.type).c_str());
+ }
+
+ const void* startAddress = GetOperandValueReadOnlyAddress(operand);
+ if (!startAddress)
+ {
+ return Fail("%s: failed to get operand address", __func__, operand.type);
+ }
+
+ // Check number of bytes is sensible
+ const uint32_t numBytes = operand.location.length;
+ if (numBytes % sizeof(int32_t) != 0)
+ {
+ return Fail("%s: invalid number of bytes: %i, expected to be a multiple of %i",
+ __func__, numBytes, sizeof(int32_t));
+ }
+
+ outValues.resize(numBytes / sizeof(int32_t));
+ memcpy(outValues.data(), startAddress, numBytes);
+ return true;
+}
+
+// Creates an ArmNN activation layer and connects it to the given layer, if the
+// passed in AndroidNN activation function requires so.
+// @return The end layer of the sequence of layers built for the given AndroidNN
+// activation function or nullptr if an error occurred (e.g. unsupported activation).
+// Note that the end layer matches the input layer if no activation is required
+// (the sequence of layers has length 1).
+armnn::IConnectableLayer* ModelToINetworkConverter::ProcessActivation(const armnn::TensorInfo& tensorInfo,
+ ActivationFn activation, armnn::IConnectableLayer* prevLayer)
+{
+ assert(prevLayer->GetNumOutputSlots() == 1);
+
+ prevLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+
+ armnn::IConnectableLayer* activationLayer = prevLayer;
+
+ if (activation != ActivationFn::kActivationNone)
+ {
+ armnn::ActivationDescriptor activationDesc;
+ switch (activation)
+ {
+ case ActivationFn::kActivationRelu:
+ {
+ activationDesc.m_Function = armnn::ActivationFunction::ReLu;
+ break;
+ }
+ case ActivationFn::kActivationRelu1:
+ {
+ activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+ activationDesc.m_A = 1.0f;
+ activationDesc.m_B = -1.0f;
+ break;
+ }
+ case ActivationFn::kActivationRelu6:
+ {
+ activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+ activationDesc.m_A = 6.0f;
+ break;
+ }
+ case ActivationFn::kActivationSigmoid:
+ {
+ activationDesc.m_Function = armnn::ActivationFunction::Sigmoid;
+ break;
+ }
+ case ActivationFn::kActivationTanh:
+ {
+ activationDesc.m_Function = armnn::ActivationFunction::TanH;
+ activationDesc.m_A = 1.0f;
+ activationDesc.m_B = 1.0f;
+ break;
+ }
+ default:
+ {
+ Fail("%s: Invalid activation enum value %i", __func__, activation);
+ return nullptr;
+ }
+ }
+
+ if (!IsLayerSupported(__func__, armnn::IsActivationSupported, m_Compute,
+ prevLayer->GetOutputSlot(0).GetTensorInfo(), activationDesc))
+ {
+ return nullptr;
+ }
+
+ activationLayer = m_Network->AddActivationLayer(activationDesc);
+
+ prevLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
+ activationLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
+ }
+
+ return activationLayer;
+}
+
+bool ModelToINetworkConverter::SetupAndTrackLayerOutputSlot(const Operation& operation, uint32_t outputIndex,
+ armnn::IConnectableLayer& layer)
+{
+ const Operand* outputOperand = GetOutputOperand(operation, outputIndex);
+
+ if ((outputOperand == nullptr) || (outputIndex >= layer.GetNumOutputSlots()))
+ {
+ return false;
+ }
+
+ armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(outputIndex);
+
+ const uint32_t operandIndex = operation.outputs[outputIndex];
+ m_OutputSlotForOperand[operandIndex] = &outputSlot;
+
+ outputSlot.SetTensorInfo(GetTensorInfoForOperand(*outputOperand));
+
+ return true;
+}
+
+bool ModelToINetworkConverter::IsOperationSupported(uint32_t operationIndex) const
+{
+ std::map<uint32_t, bool>::const_iterator it = m_OperationSupported.find(operationIndex);
+ assert(it != m_OperationSupported.end());
+ return it->second;
+}
+
+
+} // armnn_driver \ No newline at end of file
diff --git a/ModelToINetworkConverter.hpp b/ModelToINetworkConverter.hpp
new file mode 100644
index 00000000..7ced514b
--- /dev/null
+++ b/ModelToINetworkConverter.hpp
@@ -0,0 +1,156 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "HalInterfaces.h"
+#include "NeuralNetworks.h"
+#include "ActivationFunctor.h"
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/INetwork.hpp>
+#include <CpuExecutor.h>
+
+#include "Utils.hpp"
+
+#include <memory>
+#include <vector>
+#include <set>
+
+namespace armnn_driver
+{
+
+class ConstTensorPin;
+class LayerInputHandle;
+
+enum class ConversionResult
+{
+ Success,
+ ErrorMappingPools,
+ UnsupportedFeature
+};
+
+// A helper performing the conversion from an AndroidNN driver Model representation,
+// to an armnn::INetwork object
+class ModelToINetworkConverter
+{
+public:
+ ModelToINetworkConverter(armnn::Compute compute, const Model& model,
+ const std::set<unsigned int>& forcedUnsupportedOperations);
+
+ ConversionResult GetConversionResult() const { return m_ConversionResult; }
+
+ // Returns the ArmNN INetwork corresponding to the input model, if preparation went smoothly, nullptr otherwise.
+ armnn::INetwork* GetINetwork() const { return m_Network.get(); }
+
+ bool IsOperationSupported(uint32_t operationIndex) const;
+
+private:
+ void Convert();
+
+ bool ConvertOperation(const Operation& operation);
+
+ bool ConvertAdd(const Operation& operation);
+
+ bool ConvertAveragePool2d(const Operation& operation);
+
+ bool ConvertConcatenation(const Operation& operation);
+
+ bool ConvertConv2d(const Operation& operation);
+
+ bool ConvertDepthwiseConv2d(const Operation& operation);
+
+ bool ConvertFloor(const Operation& operation);
+
+ bool ConvertFullyConnected(const Operation& operation);
+
+ bool ConvertLogistic(const Operation& operation);
+
+ bool ConvertLocalResponseNormalization(const Operation& operation);
+
+ bool ConvertL2Normalization(const Operation& operation);
+
+ bool ConvertL2Pool2d(const Operation& operation);
+
+ bool ConvertMaxPool2d(const Operation& operation);
+
+ bool ConvertMul(const Operation& operation);
+
+ bool ConvertReLu(const Operation& operation);
+
+ bool ConvertReLu1(const Operation& operation);
+
+ bool ConvertReLu6(const Operation& operation);
+
+ bool ConvertSoftmax(const Operation& operation);
+
+ bool ConvertTanH(const Operation& operation);
+
+ bool ConvertReshape(const Operation& operation);
+
+ bool ConvertResizeBilinear(const Operation& operation);
+
+ bool ConvertToActivation(const Operation& operation, const char* operationName,
+ const armnn::ActivationDescriptor& activationDesc);
+
+ bool ConvertPooling2d(const Operation& operation, const char* name, armnn::PoolingAlgorithm poolType);
+
+
+ const void* GetOperandValueReadOnlyAddress(const Operand& operand) const;
+
+ const Operand* GetInputOperand(const Operation& operation, uint32_t inputIndex) const;
+
+ const Operand* GetOutputOperand(const Operation& operation, uint32_t outputIndex) const;
+
+ template<typename T>
+ bool GetInputScalar(const Operation& operation, uint32_t inputIndex, OperandType type, T& outValue) const;
+
+ bool GetInputInt32(const Operation& operation, uint32_t inputIndex, int32_t& outValue) const;
+
+ bool GetInputFloat32(const Operation& operation, uint32_t inputIndex, float& outValue) const;
+
+ bool GetInputActivationFunction(const Operation& operation, uint32_t inputIndex,
+ ActivationFn& outActivationFunction) const;
+
+ bool GetInputPaddingScheme(const Operation& operation, uint32_t inputIndex,
+ android::nn::PaddingScheme& outPaddingScheme) const;
+
+ LayerInputHandle ConvertToLayerInputHandle(const Operation& operation, uint32_t inputIndex);
+
+ ConstTensorPin ConvertOperationInputToConstTensorPin(const Operation& operation, uint32_t inputIndex,
+ const armnn::PermutationVector& dimensionMappings = g_DontPermute,
+ const armnn::TensorShape* overrideTensorShape = nullptr);
+
+ ConstTensorPin ConvertOperandToConstTensorPin(const Operand& operand,
+ const armnn::PermutationVector& dimensionMappings = g_DontPermute,
+ const armnn::TensorShape* overrideTensorShape = nullptr);
+
+ bool GetTensorInt32Values(const Operand& operand, std::vector<int32_t>& outValues) const;
+
+
+ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo, ActivationFn activation,
+ armnn::IConnectableLayer* prevLayer);
+
+
+ bool SetupAndTrackLayerOutputSlot(const Operation& operation, uint32_t outputIndex,
+ armnn::IConnectableLayer& layer);
+
+
+ // Input data
+ armnn::Compute m_Compute;
+ const Model& m_Model;
+ const std::set<unsigned int>& m_ForcedUnsupportedOperations;
+
+ // Output data
+ armnn::INetworkPtr m_Network;
+ ConversionResult m_ConversionResult;
+ std::map<uint32_t, bool> m_OperationSupported;
+
+ // Working/intermediate data
+ std::vector<armnn::IOutputSlot*> m_OutputSlotForOperand;
+ std::vector<android::nn::RunTimePoolInfo> m_MemPools;
+};
+
+} // armnn_driver
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..f549d2c2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,58 @@
+# ArmNN Android Neural Networks driver
+
+This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0 HAL.
+
+## Integration guide
+
+### Prerequisites
+
+1. Android source tree for Android O MR1 or later, in the directory `<ANDROID_ROOT>`
+2. Mali OpenCL driver integrated into the Android source tree
+
+### Procedure
+
+1. Place this source directory at `<ANDROID_ROOT>/vendor/arm/android-nn-driver`
+2. Run setup.sh
+3. Update the Android build environment to add the ArmNN driver. This ensures that the driver service
+is built and copied to the `system/vendor/bin/hw` directory in the Android image.
+To update the build environment, add to the contents of the variable `PRODUCT_PACKAGES`
+within the device-specific makefile that is located in the `<ANDROID_ROOT>/device/<manufacturer>/<product>`
+directory. This file is normally called `device.mk`:
+<pre>
+PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.0-service-armnn
+</pre>
+4. Build Android as normal, i.e. run `make` in `<ANDROID_ROOT>`
+5. To confirm that the ArmNN driver has been built, check for driver service executable at
+<pre>
+<ANDROID_ROOT>/out/target/product/<product>/system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
+</pre>
+
+### Testing
+
+1. Run the ArmNN driver service executable in the background
+<pre>
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn &
+</pre>
+2. Run some code that exercises the Android Neural Networks API, for example Android's
+`NeuralNetworksTest` unit tests (note this is an optional component that must be built).
+<pre>
+adb shell /data/nativetest/NeuralNetworksTest/NeuralNetworksTest > NeuralNetworkTest.log
+</pre>
+3. To confirm that the ArmNN driver is being used to service the Android Neural Networks API requests,
+check for messages in logcat with the `ArmnnDriver` tag.
+
+### Using ClTuner
+
+ClTuner is a feature of the Compute Library that finds optimum values for OpenCL tuning parameters. The recommended way of using it with ArmNN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation:
+
+1. Run the ArmNN driver service executable in tuning mode. The path to the tuning data must be writable by the service:
+<pre>
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; --cl-tuned-parameters-mode UpdateTunedParameters &
+</pre>
+2. Run a representative set of Android NNAPI testing loads. In this mode of operation, each NNAPI workload will be slow the first time it is executed, as the tuning parameters are being selected. Subsequent executions will use the tuning data which has been generated.
+3. Stop the service.
+4. Deploy the tuned parameters file to a location readable by the ArmNN driver service (for example, to a location within /vendor/etc).
+5. During normal operation, pass the location of the tuning data to the driver service (this would normally be done by passing arguments via Android init in the service .rc definition):
+<pre>
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; &
+</pre>
diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt
new file mode 100644
index 00000000..89f7e761
--- /dev/null
+++ b/ReleaseNotes.txt
@@ -0,0 +1,59 @@
+------ ArmNN for Android 18.02 Release Notes ------
+
+This release of ArmNN for Android supports use as a driver for the Android Neural Networks API. It implements the android.hardware.neuralnetworks@1.0 interface.
+
+For more information on the Android Neural Networks API, see https://developer.android.com/ndk/guides/neuralnetworks/index.html
+
+For integration and usage documentation, please see README.md.
+
+--- Support for Android Neural Networks HAL operations ---
+
+The following AndroidNN operations are currently supported.
+
+AndroidNN operator Tensor type supported
+ADD (FLOAT32)
+AVERAGE_POOL_2D (FLOAT32,QUANT8_ASYMM)
+CONCATENATION (FLOAT32)
+CONV_2D (FLOAT32,QUANT8_ASYMM**)
+DEPTHWISE_CONV_2D*** (FLOAT32,QUANT8_ASYMM)
+FLOOR (FLOAT32)
+FULLY_CONNECTED (FLOAT32)
+L2_NORMALIZATION (FLOAT32)
+L2_POOL_2D (FLOAT32)
+LOCAL_RESPONSE_NORMALIZATION (FLOAT32)
+LOGISTIC (FLOAT32,QUANT8_ASYMM)
+MAX_POOL_2D (FLOAT32,QUANT8_ASYMM)
+MUL* (FLOAT32)
+RELU (FLOAT32,QUANT8_ASYMM)
+RELU1 (FLOAT32,QUANT8_ASYMM)
+RELU6 (FLOAT32,QUANT8_ASYMM)
+RESHAPE (FLOAT32,QUANT8_ASYMM)
+RESIZE_BILINEAR (FLOAT32)
+SOFTMAX (FLOAT32,QUANT8_ASYMM)
+TANH (FLOAT32)
+
+* MUL currently does not support mixing of different tensor sizes.
+
+** QUANT8_ASYMM version does not support asymmetric padding. In addition, only the following configurations are supported:
+ 1) 1x1 convolution with strides of 1 or 2 or 3
+ 2) 3x3 convolution with strides of 1 or 2
+ 3) 5x5 convolution with strides of 1 or 2
+
+*** Depthwise convolution only supports a value of 1 for the depth multiplier. In addition, the QUANT8_ASYMM version only supports 3x3 kernels.
+
+
+--- Unsupported operators ---
+
+The following AndroidNN operations are currently not supported.
+
+DEPTH_TO_SPACE
+DEQUANTIZE
+EMBEDDING_LOOKUP
+HASHTABLE_LOOKUP
+LSH_PROJECTION
+LSTM
+RNN
+SPACE_TO_DEPTH
+SVDF
+
+Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework appropriately and the framework implements those operations using a CPU implementation.
diff --git a/RequestThread.cpp b/RequestThread.cpp
new file mode 100644
index 00000000..708a46c8
--- /dev/null
+++ b/RequestThread.cpp
@@ -0,0 +1,116 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "RequestThread.hpp"
+#include "ArmnnPreparedModel.hpp"
+
+#include <log/log.h>
+
+#include <boost/assert.hpp>
+
+using namespace android;
+
+namespace armnn_driver
+{
+
+RequestThread::RequestThread()
+{
+ ALOGV("RequestThread::RequestThread()");
+ m_Thread = std::make_unique<std::thread>(&RequestThread::Process, this);
+}
+
+RequestThread::~RequestThread()
+{
+ ALOGV("RequestThread::~RequestThread()");
+ // post an EXIT message to the thread
+ std::shared_ptr<AsyncExecuteData> nulldata(nullptr);
+ auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::EXIT, nulldata);
+ PostMsg(pMsg);
+ // Wait for the thread to terminate, it is deleted automatically
+ m_Thread->join();
+}
+
+void RequestThread::PostMsg(ArmnnPreparedModel* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ const ::android::sp<IExecutionCallback>& callback)
+{
+ ALOGV("RequestThread::PostMsg(...)");
+ auto data = std::make_shared<AsyncExecuteData>(model,
+ memPools,
+ inputTensors,
+ outputTensors,
+ callback);
+ auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::REQUEST, data);
+ PostMsg(pMsg);
+}
+
+void RequestThread::PostMsg(std::shared_ptr<ThreadMsg>& pMsg)
+{
+ ALOGV("RequestThread::PostMsg(pMsg)");
+ // Add a message to the queue and notify the request thread
+ std::unique_lock<std::mutex> lock(m_Mutex);
+ m_Queue.push(pMsg);
+ m_Cv.notify_one();
+}
+
+void RequestThread::Process()
+{
+ ALOGV("RequestThread::Process()");
+ while (true)
+ {
+ std::shared_ptr<ThreadMsg> pMsg(nullptr);
+ {
+ // Wait for a message to be added to the queue
+ // This is in a separate scope to minimise the lifetime of the lock
+ std::unique_lock<std::mutex> lock(m_Mutex);
+ while (m_Queue.empty())
+ {
+ m_Cv.wait(lock);
+ }
+ // get the message to process from the front of the queue
+ pMsg = m_Queue.front();
+ m_Queue.pop();
+ }
+
+ switch (pMsg->type)
+ {
+ case ThreadMsgType::REQUEST:
+ {
+ ALOGV("RequestThread::Process() - request");
+ // invoke the asynchronous execution method
+ ArmnnPreparedModel* model = pMsg->data->m_Model;
+ model->ExecuteGraph(pMsg->data->m_MemPools,
+ pMsg->data->m_InputTensors,
+ pMsg->data->m_OutputTensors,
+ pMsg->data->m_callback);
+ break;
+ }
+
+ case ThreadMsgType::EXIT:
+ {
+ ALOGV("RequestThread::Process() - exit");
+ // delete all remaining messages (there should not be any)
+ std::unique_lock<std::mutex> lock(m_Mutex);
+ while (!m_Queue.empty())
+ {
+ m_Queue.pop();
+ }
+ return;
+ }
+
+ default:
+ // this should be unreachable
+ ALOGE("RequestThread::Process() - invalid message type");
+ BOOST_ASSERT_MSG(false, "ArmNN: RequestThread: invalid message type");
+ }
+ }
+}
+
+} // namespace armnn_driver
+
diff --git a/RequestThread.hpp b/RequestThread.hpp
new file mode 100644
index 00000000..0983793e
--- /dev/null
+++ b/RequestThread.hpp
@@ -0,0 +1,104 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <queue>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+#include "CpuExecutor.h"
+#include "HalInterfaces.h"
+#include <armnn/ArmNN.hpp>
+
+namespace armnn_driver
+{
+
+class ArmnnPreparedModel;
+
+class RequestThread
+{
+public:
+ /// Constructor creates the thread
+ RequestThread();
+
+ /// Destructor terminates the thread
+ ~RequestThread();
+
+ /// Add a message to the thread queue.
+ /// @param[in] model pointer to the prepared model handling the request
+ /// @param[in] memPools pointer to the memory pools vector for the tensors
+ /// @param[in] inputTensors pointer to the input tensors for the request
+ /// @param[in] outputTensors pointer to the output tensors for the request
+ /// @param[in] callback the android notification callback
+ void PostMsg(armnn_driver::ArmnnPreparedModel* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ const ::android::sp<IExecutionCallback>& callback);
+
+private:
+ RequestThread(const RequestThread&) = delete;
+ RequestThread& operator=(const RequestThread&) = delete;
+
+ /// storage for a prepared model and args for the asyncExecute call
+ struct AsyncExecuteData
+ {
+ AsyncExecuteData(ArmnnPreparedModel* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ const ::android::sp<IExecutionCallback>& cb)
+ : m_Model(model)
+ , m_MemPools(memPools)
+ , m_InputTensors(inputTensors)
+ , m_OutputTensors(outputTensors)
+ , m_callback(cb)
+ {
+ }
+
+ armnn_driver::ArmnnPreparedModel* m_Model;
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+ std::shared_ptr<armnn::InputTensors> m_InputTensors;
+ std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+ const ::android::sp<IExecutionCallback> m_callback;
+ };
+
+ enum class ThreadMsgType
+ {
+ EXIT, // exit the thread
+ REQUEST // user request to process
+ };
+
+ /// storage for the thread message type and data
+ struct ThreadMsg
+ {
+ ThreadMsg(ThreadMsgType msgType,
+ std::shared_ptr<AsyncExecuteData>& msgData)
+ : type(msgType)
+ , data(msgData)
+ {
+ }
+
+ ThreadMsgType type;
+ std::shared_ptr<AsyncExecuteData> data;
+ };
+
+ /// Add a prepared thread message to the thread queue.
+ /// @param[in] threadMsg the message to add to the queue
+ void PostMsg(std::shared_ptr<ThreadMsg>& pThreadMsg);
+
+ /// Entry point for the request thread
+ void Process();
+
+ std::unique_ptr<std::thread> m_Thread;
+ std::queue<std::shared_ptr<ThreadMsg>> m_Queue;
+ std::mutex m_Mutex;
+ std::condition_variable m_Cv;
+};
+
+} // namespace armnn_driver
+
diff --git a/SystemPropertiesUtils.hpp b/SystemPropertiesUtils.hpp
new file mode 100644
index 00000000..57aa98ca
--- /dev/null
+++ b/SystemPropertiesUtils.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <stdio.h>
+#include <string>
+#include <iostream>
+#include <sys/system_properties.h>
+
+namespace {
+template<typename T>
+struct ConvStringTo;
+
+template<>
+struct ConvStringTo<float>
+{
+ static float Func(std::string s) { return std::stof(s); }
+};
+
+template<>
+struct ConvStringTo<int>
+{
+ static int Func(std::string s) { return std::stoi(s); }
+};
+
+template<>
+struct ConvStringTo<bool>
+{
+ static bool Func(std::string s) { return !!std::stoi(s); }
+};
+
+template<typename T>
+void GetCapabilitiesProperties([[maybe_unused]]void* cookie,
+ [[maybe_unused]]const char *name,
+ [[maybe_unused]]const char *value,
+ [[maybe_unused]]uint32_t serial)
+{
+ T &prop = *reinterpret_cast<T*>(cookie);
+ prop = ConvStringTo<T>::Func(std::string(value));
+}
+
+template<typename T>
+T ParseSystemProperty(const char* name, T defaultValue)
+{
+ try
+ {
+ const prop_info *pInfo = __system_property_find(name);
+ if (!pInfo)
+ {
+ ALOGW("ArmnnDriver::ParseSystemProperty(): Could not find property [%s].", name);
+ } else
+ {
+ T property;
+ __system_property_read_callback(pInfo, &GetCapabilitiesProperties<T>, &property);
+ std::stringstream messageBuilder;
+ messageBuilder << "ArmnnDriver::ParseSystemProperty(): Setting [" << name << "]=[" << property << "].";
+ ALOGD("%s", messageBuilder.str().c_str());
+ return property;
+ }
+ }
+ catch(const std::invalid_argument& e)
+ {
+ ALOGD("ArmnnDriver::ParseSystemProperty(): Property [%s] has invalid data type.", name);
+ }
+ catch(const std::out_of_range& e)
+ {
+ ALOGD("ArmnnDriver::ParseSystemProperty(): Property [%s] out of range for the data type.", name);
+ }
+ catch (...)
+ {
+ ALOGD("ArmnnDriver::ParseSystemProperty(): Unexpected exception reading system "
+ "property [%s].", name);
+ }
+
+ std::stringstream messageBuilder;
+ messageBuilder << "ArmnnDriver::ParseSystemProperty(): Falling back to default value [" << defaultValue << "]";
+ ALOGD("%s", messageBuilder.str().c_str());
+ return defaultValue;
+}
+} //namespace \ No newline at end of file
diff --git a/Utils.cpp b/Utils.cpp
new file mode 100644
index 00000000..33c1cd3c
--- /dev/null
+++ b/Utils.cpp
@@ -0,0 +1,273 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "Utils.hpp"
+
+#include <Permute.hpp>
+
+#include <boost/format.hpp>
+#include <log/log.h>
+
+#include <cassert>
+#include <cinttypes>
+#include <fstream>
+
+using namespace android;
+using namespace android::hidl::memory::V1_0;
+
+namespace armnn_driver
+{
+const armnn::PermutationVector g_DontPermute{};
+
+namespace
+{
+
+template <typename T>
+void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorShape& inTensorShape, const void* input,
+ void* output, const armnn::PermutationVector& mappings)
+{
+ const auto inputData = static_cast<const T*>(input);
+ const auto outputData = static_cast<T*>(output);
+
+ armnnUtils::Permute(armnnUtils::Permuted(inTensorShape, mappings), mappings, inputData, outputData);
+}
+
+} // anonymous namespace
+
+void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void* input, void* output,
+ const armnn::PermutationVector& mappings)
+{
+ assert(tensor.GetNumDimensions() == 4U);
+
+ switch(tensor.GetDataType())
+ {
+ case armnn::DataType::Float32:
+ SwizzleAndroidNn4dTensorToArmNn<float>(tensor.GetShape(), input, output, mappings);
+ break;
+ case armnn::DataType::QuantisedAsymm8:
+ SwizzleAndroidNn4dTensorToArmNn<uint8_t>(tensor.GetShape(), input, output, mappings);
+ break;
+ default:
+ ALOGW("Unknown armnn::DataType for swizzling");
+ assert(0);
+ }
+}
+
+void* GetMemoryFromPool(DataLocation location, const std::vector<android::nn::RunTimePoolInfo>& memPools)
+{
+ // find the location within the pool
+ assert(location.poolIndex < memPools.size());
+
+ uint8_t* memory =
+ static_cast<uint8_t*>(static_cast<void*>(memPools[location.poolIndex].buffer)) + location.offset;
+
+ return memory;
+}
+
+armnn::TensorInfo GetTensorInfoForOperand(const Operand& operand)
+{
+ armnn::DataType type;
+
+ switch (operand.type)
+ {
+ case OperandType::TENSOR_FLOAT32:
+ type = armnn::DataType::Float32;
+ break;
+ case OperandType::TENSOR_QUANT8_ASYMM:
+ type = armnn::DataType::QuantisedAsymm8;
+ break;
+ case OperandType::TENSOR_INT32:
+ type = armnn::DataType::Signed32;
+ break;
+ default:
+ throw UnsupportedOperand(operand.type);
+ }
+
+ armnn::TensorInfo ret(operand.dimensions.size(), operand.dimensions.data(), type);
+
+ ret.SetQuantizationScale(operand.scale);
+ ret.SetQuantizationOffset(operand.zeroPoint);
+
+ return ret;
+}
+
+std::string GetOperandSummary(const Operand& operand)
+{
+ return android::hardware::details::arrayToString(operand.dimensions, operand.dimensions.size()) + " " +
+ toString(operand.type);
+}
+
+std::string GetModelSummary(const Model& model)
+{
+ std::stringstream result;
+
+ result << model.inputIndexes.size() << " input(s), " << model.operations.size() << " operation(s), " <<
+ model.outputIndexes.size() << " output(s), " << model.operands.size() << " operand(s)" << std::endl;
+
+ result << "Inputs: ";
+ for (uint32_t i = 0; i < model.inputIndexes.size(); i++)
+ {
+ result << GetOperandSummary(model.operands[model.inputIndexes[i]]) << ", ";
+ }
+ result << std::endl;
+
+ result << "Operations: ";
+ for (uint32_t i = 0; i < model.operations.size(); i++)
+ {
+ result << toString(model.operations[i].type).c_str() << ", ";
+ }
+ result << std::endl;
+
+ result << "Outputs: ";
+ for (uint32_t i = 0; i < model.outputIndexes.size(); i++)
+ {
+ result << GetOperandSummary(model.operands[model.outputIndexes[i]]) << ", ";
+ }
+ result << std::endl;
+
+ return result.str();
+}
+
+using DumpElementFunction = void (*)(const armnn::ConstTensor& tensor,
+ unsigned int elementIndex,
+ std::ofstream& fileStream);
+
+namespace
+{
+template <typename ElementType, typename PrintableType = ElementType>
+void DumpTensorElement(const armnn::ConstTensor& tensor, unsigned int elementIndex, std::ofstream& fileStream)
+{
+ const ElementType* elements = reinterpret_cast<const ElementType*>(tensor.GetMemoryArea());
+ fileStream << static_cast<PrintableType>(elements[elementIndex]) << ",";
+}
+
+constexpr const char* MemoryLayoutString(const armnn::ConstTensor& tensor)
+{
+ const char* str = "";
+
+ switch (tensor.GetNumDimensions())
+ {
+ case 4: { str = "(BHWC) "; break; }
+ case 3: { str = "(HWC) "; break; }
+ case 2: { str = "(HW) "; break; }
+ default: { str = ""; break; }
+ }
+
+ return str;
+}
+} // namespace
+
+void DumpTensor(const std::string& dumpDir,
+ const std::string& requestName,
+ const std::string& tensorName,
+ const armnn::ConstTensor& tensor)
+{
+ // The dump directory must exist in advance.
+ const std::string fileName = boost::str(boost::format("%1%/%2%_%3%.dump") % dumpDir % requestName % tensorName);
+
+ std::ofstream fileStream;
+ fileStream.open(fileName, std::ofstream::out | std::ofstream::trunc);
+
+ if (!fileStream.good())
+ {
+ ALOGW("Could not open file %s for writing", fileName.c_str());
+ return;
+ }
+
+ DumpElementFunction dumpElementFunction = nullptr;
+
+ switch (tensor.GetDataType())
+ {
+ case armnn::DataType::Float32:
+ {
+ dumpElementFunction = &DumpTensorElement<float>;
+ break;
+ }
+ case armnn::DataType::QuantisedAsymm8:
+ {
+ dumpElementFunction = &DumpTensorElement<uint8_t, uint32_t>;
+ break;
+ }
+ case armnn::DataType::Signed32:
+ {
+ dumpElementFunction = &DumpTensorElement<int32_t>;
+ break;
+ }
+ default:
+ {
+ dumpElementFunction = nullptr;
+ }
+ }
+
+ if (dumpElementFunction != nullptr)
+ {
+ const unsigned int numDimensions = tensor.GetNumDimensions();
+
+ const unsigned int batch = (numDimensions == 4) ? tensor.GetShape()[numDimensions - 4] : 1;
+
+ const unsigned int height = (numDimensions >= 3)
+ ? tensor.GetShape()[numDimensions - 3]
+ : (numDimensions >= 2) ? tensor.GetShape()[numDimensions - 2] : 1;
+
+ const unsigned int width = (numDimensions >= 3)
+ ? tensor.GetShape()[numDimensions - 2]
+ : (numDimensions >= 1) ? tensor.GetShape()[numDimensions - 1] : 0;
+
+ const unsigned int channels = (numDimensions >= 3) ? tensor.GetShape()[numDimensions - 1] : 1;
+
+ fileStream << "# Number of elements " << tensor.GetNumElements() << std::endl;
+ fileStream << "# Dimensions " << MemoryLayoutString(tensor);
+ fileStream << "[" << tensor.GetShape()[0];
+ for (unsigned int d = 1; d < numDimensions; d++)
+ {
+ fileStream << "," << tensor.GetShape()[d];
+ }
+ fileStream << "]" << std::endl;
+
+ for (unsigned int e = 0, b = 0; b < batch; ++b)
+ {
+ if (numDimensions >= 4)
+ {
+ fileStream << "# Batch " << b << std::endl;
+ }
+ for (unsigned int c = 0; c < channels; c++)
+ {
+ if (numDimensions >= 3)
+ {
+ fileStream << "# Channel " << c << std::endl;
+ }
+ for (unsigned int h = 0; h < height; h++)
+ {
+ for (unsigned int w = 0; w < width; w++, e += channels)
+ {
+ (*dumpElementFunction)(tensor, e, fileStream);
+ }
+ fileStream << std::endl;
+ }
+ e -= channels - 1;
+ if (c < channels)
+ {
+ e -= ((height * width) - 1) * channels;
+ }
+ }
+ fileStream << std::endl;
+ }
+ fileStream << std::endl;
+ }
+ else
+ {
+ fileStream << "Cannot dump tensor elements: Unsupported data type "
+ << static_cast<unsigned int>(tensor.GetDataType()) << std::endl;
+ }
+
+ if (!fileStream.good())
+ {
+ ALOGW("An error occurred when writing to file %s", fileName.c_str());
+ }
+}
+
+} // namespace armnn_driver
diff --git a/Utils.hpp b/Utils.hpp
new file mode 100644
index 00000000..49b8b8d8
--- /dev/null
+++ b/Utils.hpp
@@ -0,0 +1,51 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "HalInterfaces.h"
+#include "NeuralNetworks.h"
+#include <armnn/ArmNN.hpp>
+#include <CpuExecutor.h>
+
+#include <vector>
+#include <string>
+
+namespace armnn_driver
+{
+
+extern const armnn::PermutationVector g_DontPermute;
+
+class UnsupportedOperand: public std::runtime_error
+{
+public:
+ UnsupportedOperand(const OperandType type)
+ : std::runtime_error("Operand type is unsupported")
+ , m_type(type)
+ {}
+
+ OperandType m_type;
+};
+
+/// Swizzles tensor data in @a input according to the dimension mappings.
+void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void* input, void* output,
+ const armnn::PermutationVector& mappings);
+
+/// Returns a pointer to a specific location in a pool
+void* GetMemoryFromPool(DataLocation location,
+ const std::vector<android::nn::RunTimePoolInfo>& memPools);
+
+/// Can throw UnsupportedOperand
+armnn::TensorInfo GetTensorInfoForOperand(const Operand& operand);
+
+std::string GetOperandSummary(const Operand& operand);
+std::string GetModelSummary(const Model& model);
+
+void DumpTensor(const std::string& dumpDir,
+ const std::string& requestName,
+ const std::string& tensorName,
+ const armnn::ConstTensor& tensor);
+
+}
diff --git a/android.hardware.neuralnetworks@1.0-service-armnn.rc b/android.hardware.neuralnetworks@1.0-service-armnn.rc
new file mode 100644
index 00000000..d7200861
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.0-service-armnn.rc
@@ -0,0 +1,4 @@
+service neuralnetworks_hal_service_armnn /vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
+ class hal
+ user system
+ group system
diff --git a/service.cpp b/service.cpp
new file mode 100644
index 00000000..742091ef
--- /dev/null
+++ b/service.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "ArmnnDriver.hpp"
+
+#include <hidl/LegacySupport.h>
+#include <log/log.h>
+
+#include <string>
+#include <vector>
+
+using namespace armnn_driver;
+using namespace std;
+
+int main(int argc, char** argv)
+{
+ android::sp<ArmnnDriver> driver = new ArmnnDriver(DriverOptions(argc, argv));
+
+ android::hardware::configureRpcThreadpool(1, true);
+ if (driver->registerAsService("armnn") != android::OK)
+ {
+ ALOGE("Could not register service");
+ return 1;
+ }
+ android::hardware::joinRpcThreadpool();
+ ALOGE("Service exited!");
+ return 1;
+}
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 00000000..91a75ec8
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+function AssertZeroExitCode {
+ EXITCODE=$?
+ if [ $EXITCODE -ne 0 ]; then
+ echo "$1"
+ echo "+++ Command exited with code $EXITCODE. Please fix the above errors and re-run"
+ exit 1
+ fi
+}
+
+if [ ! -d boost_1_64_0 ]; then
+ echo "++ Downloading Boost"
+
+ BOOST_PKG=boost_1_64_0.tar.gz
+
+ curl -LOk https://dl.bintray.com/boostorg/release/1.64.0/source/boost_1_64_0.tar.gz
+ AssertZeroExitCode "Downloading Boost failed"
+
+ tar xzf $BOOST_PKG
+ AssertZeroExitCode "Unpacking Boost failed"
+
+ rm -rf $BOOST_PKG
+fi
+
+if [ ! -d armnn ]; then
+ echo "++ Downloading armnn"
+
+ git clone git@github.com:ARM-software/armnn.git armnn
+ AssertZeroExitCode "Cloning armnn failed"
+fi
+
+if [ ! -d clframework ]; then
+ echo "++ Downloading clframework"
+
+ git clone git@github.com:ARM-software/ComputeLibrary.git clframework
+ AssertZeroExitCode "Cloning clframework failed"
+fi
+
+# Get scons to create the generated source code which clframework needs to compile.
+# This is required for the Android build system to build clframework (see below)
+pushd clframework
+scons os=android build=embed_only neon=0 opencl=1 embed_kernels=1 validation_tests=0 \
+ arch=arm64-v8a build_dir=android-arm64v8a benchmark_tests=0 -j16 \
+ build/android-arm64v8a/src/core/arm_compute_version.embed build/android-arm64v8a/src/core/CL/cl_kernels
+AssertZeroExitCode "Precompiling clframework failed"
+popd
+
diff --git a/test/Android.mk b/test/Android.mk
new file mode 100644
index 00000000..7a718afa
--- /dev/null
+++ b/test/Android.mk
@@ -0,0 +1,68 @@
+#
+# Copyright © 2017 ARM Ltd. All rights reserved.
+# See LICENSE file in the project root for full license information.
+#
+
+LOCAL_PATH := $(call my-dir)
+
+# Configure these paths if you move the source or Khronos headers
+#
+OPENCL_HEADER_PATH := $(LOCAL_PATH)/../../mali/product/khronos/original
+NN_HEADER_PATH := $(LOCAL_PATH)/../../../../frameworks/ml/nn/runtime/include
+ARMNN_HEADER_PATH := $(LOCAL_PATH)/../armnn/include
+ARMNN_DRIVER_HEADER_PATH := $(LOCAL_PATH)/..
+
+include $(CLEAR_VARS)
+
+LOCAL_C_INCLUDES := \
+ $(OPENCL_HEADER_PATH) \
+ $(NN_HEADER_PATH) \
+ $(ARMNN_HEADER_PATH) \
+ $(ARMNN_DRIVER_HEADER_PATH)
+
+LOCAL_CFLAGS := \
+ -std=c++14 \
+ -fexceptions \
+ -Werror \
+ -UNDEBUG
+
+LOCAL_SRC_FILES := \
+ Tests.cpp
+
+LOCAL_STATIC_LIBRARIES := \
+ libarmnn-driver \
+ libneuralnetworks_common \
+ libarmnn \
+ libboost_log \
+ libboost_system \
+ libboost_unit_test_framework \
+ libboost_thread \
+ armnn-arm_compute
+
+LOCAL_SHARED_LIBRARIES := \
+ libbase \
+ libhidlbase \
+ libhidltransport \
+ libhidlmemory \
+ libtextclassifier \
+ libtextclassifier_hash \
+ liblog \
+ libutils \
+ android.hardware.neuralnetworks@1.0 \
+ android.hidl.allocator@1.0 \
+ android.hidl.memory@1.0 \
+ libOpenCL
+
+LOCAL_MODULE := armnn-driver-tests
+
+LOCAL_MODULE_TAGS := eng optional
+
+LOCAL_ARM_MODE := arm
+
+# Mark source files as dependent on Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+
+include $(BUILD_EXECUTABLE)
+
+
+
diff --git a/test/Tests.cpp b/test/Tests.cpp
new file mode 100755
index 00000000..5f3dd6f6
--- /dev/null
+++ b/test/Tests.cpp
@@ -0,0 +1,978 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#define LOG_TAG "ArmnnDriverTests"
+#define BOOST_TEST_MODULE armnn_driver_tests
+#include <boost/test/unit_test.hpp>
+#include <log/log.h>
+
+#include "../ArmnnDriver.hpp"
+#include "../SystemPropertiesUtils.hpp"
+
+#include "OperationsUtils.h"
+
+#include <condition_variable>
+
+namespace android
+{
+namespace hardware
+{
+namespace neuralnetworks
+{
+namespace V1_0
+{
+
+std::ostream& operator<<(std::ostream& os, ErrorStatus stat)
+{
+ return os << static_cast<int>(stat);
+}
+
+}
+}
+}
+}
+
+BOOST_AUTO_TEST_SUITE(DriverTests)
+
+using namespace armnn_driver;
+using namespace android::nn;
+using namespace android;
+
+BOOST_AUTO_TEST_CASE(Init)
+{
+ // Making the driver object on the stack causes a weird libc error, so make it on the heap instead
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+ DeviceStatus status = driver->getStatus();
+ // Note double-parentheses to avoid compile error from Boost trying to printf the DeviceStatus
+ BOOST_TEST((status == DeviceStatus::AVAILABLE));
+}
+
+BOOST_AUTO_TEST_CASE(TestCapabilities)
+{
+ // Making the driver object on the stack causes a weird libc error, so make it on the heap instead
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+ ErrorStatus error;
+ Capabilities cap;
+
+ ArmnnDriver::getCapabilities_cb cb = [&](ErrorStatus status, const Capabilities& capabilities)
+ {
+ error = status;
+ cap = capabilities;
+ };
+
+ driver->getCapabilities(cb);
+
+ BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+ BOOST_TEST(cap.float32Performance.execTime > 0.f);
+ BOOST_TEST(cap.float32Performance.powerUsage > 0.f);
+ BOOST_TEST(cap.quantized8Performance.execTime > 0.f);
+ BOOST_TEST(cap.quantized8Performance.powerUsage > 0.f);
+}
+
+BOOST_AUTO_TEST_CASE(SystemProperties)
+{
+ // Test default value
+ {
+ auto p = __system_property_find("thisDoesNotExist");
+ BOOST_TEST((p == nullptr));
+
+ int defaultValue = ParseSystemProperty("thisDoesNotExist", -4);
+ BOOST_TEST((defaultValue == -4));
+ }
+
+ // Test default value from bad data type
+ {
+ __system_property_set("thisIsNotFloat", "notfloat");
+ float defaultValue = ParseSystemProperty("thisIsNotFloat", 0.1f);
+ BOOST_TEST((defaultValue == 0.1f));
+ }
+
+ // Test fetching bool values
+ {
+ __system_property_set("myTestBool", "1");
+ bool b = ParseSystemProperty("myTestBool", false);
+ BOOST_TEST((b == true));
+ }
+ {
+ __system_property_set("myTestBool", "0");
+ bool b = ParseSystemProperty("myTestBool", true);
+ BOOST_TEST((b == false));
+ }
+
+ // Test fetching int
+ {
+ __system_property_set("myTestInt", "567");
+ int i = ParseSystemProperty("myTestInt", 890);
+ BOOST_TEST((i==567));
+ }
+
+ // Test fetching float
+ {
+ __system_property_set("myTestFloat", "1.2f");
+ float f = ParseSystemProperty("myTestFloat", 3.4f);
+ BOOST_TEST((f==1.2f));
+ }
+}
+
+// The following are helpers for writing unit tests for the driver
+namespace
+{
+
+struct ExecutionCallback : public IExecutionCallback
+{
+ ExecutionCallback()
+ : mNotified(false)
+ {
+ }
+
+ Return<void> notify(ErrorStatus status) override
+ {
+ (void)status;
+ ALOGI("ExecutionCallback::notify invoked");
+ std::lock_guard<std::mutex> executionLock(mMutex);
+ mNotified = true;
+ mCondition.notify_one();
+ return Void();
+ }
+
+ /// wait until the callback has notified us that it is done
+ Return<void> wait()
+ {
+ ALOGI("ExecutionCallback::wait invoked");
+ std::unique_lock<std::mutex> executionLock(mMutex);
+ while (!mNotified)
+ {
+ mCondition.wait(executionLock);
+ }
+ mNotified = false;
+ return Void();
+ }
+
+private:
+ // use a mutex and a condition variable to wait for asynchronous callbacks
+ std::mutex mMutex;
+ std::condition_variable mCondition;
+ // and a flag, in case we are notified before the wait call
+ bool mNotified;
+};
+
+class PreparedModelCallback : public IPreparedModelCallback
+{
+public:
+ PreparedModelCallback()
+ {
+ }
+
+ ~PreparedModelCallback() override
+ {
+ }
+
+ Return<void> notify(ErrorStatus status, const sp<IPreparedModel>& preparedModel) override
+ {
+ m_ErrorStatus = status;
+ m_PreparedModel = preparedModel;
+ return Void();
+ }
+
+ ErrorStatus GetErrorStatus()
+ {
+ return m_ErrorStatus;
+ }
+
+ sp<IPreparedModel> GetPreparedModel()
+ {
+ return m_PreparedModel;
+ }
+
+
+private:
+ ErrorStatus m_ErrorStatus;
+ sp<IPreparedModel> m_PreparedModel;
+};
+
+
+
+// lifted from common/Utils.cpp
+hidl_memory allocateSharedMemory(int64_t size)
+{
+ hidl_memory memory;
+
+ const std::string& type = "ashmem";
+ android::sp<IAllocator> allocator = IAllocator::getService(type);
+ allocator->allocate(size, [&](bool success, const hidl_memory& mem) {
+ if (!success)
+ {
+ ALOGE("unable to allocate %li bytes of %s", size, type.c_str());
+ }
+ else
+ {
+ memory = mem;
+ }
+ });
+
+ return memory;
+}
+
+
+android::sp<IMemory> AddPoolAndGetData(uint32_t size, Request& request)
+{
+ hidl_memory pool;
+
+ android::sp<IAllocator> allocator = IAllocator::getService("ashmem");
+ allocator->allocate(sizeof(float) * size, [&](bool success, const hidl_memory& mem) {
+ BOOST_TEST(success);
+ pool = mem;
+ });
+
+ request.pools.resize(request.pools.size() + 1);
+ request.pools[request.pools.size() - 1] = pool;
+
+ android::sp<IMemory> mapped = mapMemory(pool);
+ mapped->update();
+ return mapped;
+}
+
+void AddPoolAndSetData(uint32_t size, Request& request, float* data)
+{
+ android::sp<IMemory> memory = AddPoolAndGetData(size, request);
+
+ float* dst = static_cast<float*>(static_cast<void*>(memory->getPointer()));
+
+ memcpy(dst, data, size * sizeof(float));
+}
+
+void AddOperand(Model& model, const Operand& op)
+{
+ model.operands.resize(model.operands.size() + 1);
+ model.operands[model.operands.size() - 1] = op;
+}
+
+void AddIntOperand(Model& model, int32_t value)
+{
+ DataLocation location = {};
+ location.offset = model.operandValues.size();
+ location.length = sizeof(int32_t);
+
+ Operand op = {};
+ op.type = OperandType::INT32;
+ op.dimensions = hidl_vec<uint32_t>{};
+ op.lifetime = OperandLifeTime::CONSTANT_COPY;
+ op.location = location;
+
+ model.operandValues.resize(model.operandValues.size() + location.length);
+ *reinterpret_cast<int32_t*>(&model.operandValues[location.offset]) = value;
+
+ AddOperand(model, op);
+}
+
+template<typename T>
+OperandType TypeToOperandType();
+
+template<>
+OperandType TypeToOperandType<float>()
+{
+ return OperandType::TENSOR_FLOAT32;
+};
+
+template<>
+OperandType TypeToOperandType<int32_t>()
+{
+ return OperandType::TENSOR_INT32;
+};
+
+
+
+template<typename T>
+void AddTensorOperand(Model& model, hidl_vec<uint32_t> dimensions, T* values)
+{
+ uint32_t totalElements = 1;
+ for (uint32_t dim : dimensions)
+ {
+ totalElements *= dim;
+ }
+
+ DataLocation location = {};
+ location.offset = model.operandValues.size();
+ location.length = totalElements * sizeof(T);
+
+ Operand op = {};
+ op.type = TypeToOperandType<T>();
+ op.dimensions = dimensions;
+ op.lifetime = OperandLifeTime::CONSTANT_COPY;
+ op.location = location;
+
+ model.operandValues.resize(model.operandValues.size() + location.length);
+ for (uint32_t i = 0; i < totalElements; i++)
+ {
+ *(reinterpret_cast<T*>(&model.operandValues[location.offset]) + i) = values[i];
+ }
+
+ AddOperand(model, op);
+}
+
+void AddInputOperand(Model& model, hidl_vec<uint32_t> dimensions)
+{
+ Operand op = {};
+ op.type = OperandType::TENSOR_FLOAT32;
+ op.dimensions = dimensions;
+ op.lifetime = OperandLifeTime::MODEL_INPUT;
+
+ AddOperand(model, op);
+
+ model.inputIndexes.resize(model.inputIndexes.size() + 1);
+ model.inputIndexes[model.inputIndexes.size() - 1] = model.operands.size() - 1;
+}
+
+void AddOutputOperand(Model& model, hidl_vec<uint32_t> dimensions)
+{
+ Operand op = {};
+ op.type = OperandType::TENSOR_FLOAT32;
+ op.dimensions = dimensions;
+ op.lifetime = OperandLifeTime::MODEL_OUTPUT;
+
+ AddOperand(model, op);
+
+ model.outputIndexes.resize(model.outputIndexes.size() + 1);
+ model.outputIndexes[model.outputIndexes.size() - 1] = model.operands.size() - 1;
+}
+
+android::sp<IPreparedModel> PrepareModel(const Model& model, ArmnnDriver& driver)
+{
+
+ sp<PreparedModelCallback> cb(new PreparedModelCallback());
+ driver.prepareModel(model, cb);
+
+ BOOST_TEST((cb->GetErrorStatus() == ErrorStatus::NONE));
+ BOOST_TEST((cb->GetPreparedModel() != nullptr));
+
+ return cb->GetPreparedModel();
+}
+
+void Execute(android::sp<IPreparedModel> preparedModel, const Request& request)
+{
+ sp<ExecutionCallback> cb(new ExecutionCallback());
+ BOOST_TEST(preparedModel->execute(request, cb) == ErrorStatus::NONE);
+ ALOGI("Execute: waiting for callback to be invoked");
+ cb->wait();
+}
+
+sp<ExecutionCallback> ExecuteNoWait(android::sp<IPreparedModel> preparedModel, const Request& request)
+{
+ sp<ExecutionCallback> cb(new ExecutionCallback());
+ BOOST_TEST(preparedModel->execute(request, cb) == ErrorStatus::NONE);
+ ALOGI("ExecuteNoWait: returning callback object");
+ return cb;
+}
+}
+
+// Add our own test here since we fail the fc tests which Google supplies (because of non-const weights)
+BOOST_AUTO_TEST_CASE(FullyConnected)
+{
+ // this should ideally replicate fully_connected_float.model.cpp
+ // but that uses slightly weird dimensions which I don't think we need to support for now
+
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+ Model model = {};
+
+ // add operands
+ int32_t actValue = 0;
+ float weightValue[] = {2, 4, 1};
+ float biasValue[] = {4};
+
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+ AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
+ AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+ AddIntOperand(model, actValue);
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
+
+ // make the fully connected operation
+ model.operations.resize(1);
+ model.operations[0].type = OperationType::FULLY_CONNECTED;
+ model.operations[0].inputs = hidl_vec<uint32_t>{0, 1, 2, 3};
+ model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+ // make the prepared model
+ android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+ // construct the request
+ DataLocation inloc = {};
+ inloc.poolIndex = 0;
+ inloc.offset = 0;
+ inloc.length = 3 * sizeof(float);
+ RequestArgument input = {};
+ input.location = inloc;
+ input.dimensions = hidl_vec<uint32_t>{};
+
+ DataLocation outloc = {};
+ outloc.poolIndex = 1;
+ outloc.offset = 0;
+ outloc.length = 1 * sizeof(float);
+ RequestArgument output = {};
+ output.location = outloc;
+ output.dimensions = hidl_vec<uint32_t>{};
+
+ Request request = {};
+ request.inputs = hidl_vec<RequestArgument>{input};
+ request.outputs = hidl_vec<RequestArgument>{output};
+
+ // set the input data (matching source test)
+ float indata[] = {2, 32, 16};
+ AddPoolAndSetData(3, request, indata);
+
+ // add memory for the output
+ android::sp<IMemory> outMemory = AddPoolAndGetData(1, request);
+ float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+ // run the execution
+ Execute(preparedModel, request);
+
+ // check the result
+ BOOST_TEST(outdata[0] == 152);
+}
+
+// Add our own test for concurrent execution
+// The main point of this test is to check that multiple requests can be
+// executed without waiting for the callback from previous execution.
+// The operations performed are not significant.
+BOOST_AUTO_TEST_CASE(ConcurrentExecute)
+{
+ ALOGI("ConcurrentExecute: entry");
+
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+ Model model = {};
+
+ // add operands
+ int32_t actValue = 0;
+ float weightValue[] = {2, 4, 1};
+ float biasValue[] = {4};
+
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+ AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
+ AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+ AddIntOperand(model, actValue);
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
+
+ // make the fully connected operation
+ model.operations.resize(1);
+ model.operations[0].type = OperationType::FULLY_CONNECTED;
+ model.operations[0].inputs = hidl_vec<uint32_t>{0, 1, 2, 3};
+ model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+ // make the prepared models
+ const size_t maxRequests = 5;
+ android::sp<IPreparedModel> preparedModels[maxRequests];
+ for (size_t i = 0; i < maxRequests; ++i)
+ {
+ preparedModels[i] = PrepareModel(model, *driver);
+ }
+
+ // construct the request data
+ DataLocation inloc = {};
+ inloc.poolIndex = 0;
+ inloc.offset = 0;
+ inloc.length = 3 * sizeof(float);
+ RequestArgument input = {};
+ input.location = inloc;
+ input.dimensions = hidl_vec<uint32_t>{};
+
+ DataLocation outloc = {};
+ outloc.poolIndex = 1;
+ outloc.offset = 0;
+ outloc.length = 1 * sizeof(float);
+ RequestArgument output = {};
+ output.location = outloc;
+ output.dimensions = hidl_vec<uint32_t>{};
+
+ // build the requests
+ Request requests[maxRequests];
+ android::sp<IMemory> outMemory[maxRequests];
+ float* outdata[maxRequests];
+ for (size_t i = 0; i < maxRequests; ++i)
+ {
+ requests[i].inputs = hidl_vec<RequestArgument>{input};
+ requests[i].outputs = hidl_vec<RequestArgument>{output};
+ // set the input data (matching source test)
+ float indata[] = {2, 32, 16};
+ AddPoolAndSetData(3, requests[i], indata);
+ // add memory for the output
+ outMemory[i] = AddPoolAndGetData(1, requests[i]);
+ outdata[i] = static_cast<float*>(static_cast<void*>(outMemory[i]->getPointer()));
+ }
+
+ // invoke the execution of the requests
+ ALOGI("ConcurrentExecute: executing requests");
+ sp<ExecutionCallback> cb[maxRequests];
+ for (size_t i = 0; i < maxRequests; ++i)
+ {
+ cb[i] = ExecuteNoWait(preparedModels[i], requests[i]);
+ }
+
+ // wait for the requests to complete
+ ALOGI("ConcurrentExecute: waiting for callbacks");
+ for (size_t i = 0; i < maxRequests; ++i)
+ {
+ cb[i]->wait();
+ }
+
+ // check the results
+ ALOGI("ConcurrentExecute: validating results");
+ for (size_t i = 0; i < maxRequests; ++i)
+ {
+ BOOST_TEST(outdata[i][0] == 152);
+ }
+ ALOGI("ConcurrentExecute: exit");
+}
+
+BOOST_AUTO_TEST_CASE(GetSupportedOperations)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+ ErrorStatus error;
+ std::vector<bool> sup;
+
+ ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+ {
+ error = status;
+ sup = supported;
+ };
+
+ Model model1 = {};
+
+ // add operands
+ int32_t actValue = 0;
+ float weightValue[] = {2, 4, 1};
+ float biasValue[] = {4};
+
+ AddInputOperand(model1, hidl_vec<uint32_t>{1, 3});
+ AddTensorOperand(model1, hidl_vec<uint32_t>{1, 3}, weightValue);
+ AddTensorOperand(model1, hidl_vec<uint32_t>{1}, biasValue);
+ AddIntOperand(model1, actValue);
+ AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
+
+ // make a correct fully connected operation
+ model1.operations.resize(2);
+ model1.operations[0].type = OperationType::FULLY_CONNECTED;
+ model1.operations[0].inputs = hidl_vec<uint32_t>{0, 1, 2, 3};
+ model1.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+ // make an incorrect fully connected operation
+ AddIntOperand(model1, actValue);
+ AddOutputOperand(model1, hidl_vec<uint32_t>{1, 1});
+ model1.operations[1].type = OperationType::FULLY_CONNECTED;
+ model1.operations[1].inputs = hidl_vec<uint32_t>{4};
+ model1.operations[1].outputs = hidl_vec<uint32_t>{5};
+
+ driver->getSupportedOperations(model1, cb);
+ BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+ BOOST_TEST(sup[0] == true);
+ BOOST_TEST(sup[1] == false);
+
+ // Broadcast add/mul are not supported
+ Model model2 = {};
+
+ AddInputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+ AddInputOperand(model2, hidl_vec<uint32_t>{4});
+ AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+ AddOutputOperand(model2, hidl_vec<uint32_t>{1, 1, 3, 4});
+
+ model2.operations.resize(2);
+
+ model2.operations[0].type = OperationType::ADD;
+ model2.operations[0].inputs = hidl_vec<uint32_t>{0,1};
+ model2.operations[0].outputs = hidl_vec<uint32_t>{2};
+
+ model2.operations[1].type = OperationType::MUL;
+ model2.operations[1].inputs = hidl_vec<uint32_t>{0,1};
+ model2.operations[1].outputs = hidl_vec<uint32_t>{3};
+
+ driver->getSupportedOperations(model2, cb);
+ BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+ BOOST_TEST(sup[0] == false);
+ BOOST_TEST(sup[1] == false);
+
+ Model model3 = {};
+
+ // Add unsupported operation, should return no error but we don't support it
+ AddInputOperand(model3, hidl_vec<uint32_t>{1, 1, 1, 8});
+ AddIntOperand(model3, 2);
+ AddOutputOperand(model3, hidl_vec<uint32_t>{1, 2, 2, 2});
+ model3.operations.resize(1);
+ model3.operations[0].type = OperationType::DEPTH_TO_SPACE;
+ model1.operations[0].inputs = hidl_vec<uint32_t>{0, 1};
+ model3.operations[0].outputs = hidl_vec<uint32_t>{2};
+
+ driver->getSupportedOperations(model3, cb);
+ BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+ BOOST_TEST(sup[0] == false);
+
+ // Add invalid operation
+ Model model4 = {};
+ AddIntOperand(model4, 0);
+ model4.operations.resize(1);
+ model4.operations[0].type = static_cast<OperationType>(100);
+ model4.operations[0].outputs = hidl_vec<uint32_t>{0};
+
+ driver->getSupportedOperations(model4, cb);
+ BOOST_TEST((int)error == (int)ErrorStatus::INVALID_ARGUMENT);
+}
+
+// The purpose of this test is to ensure that when encountering an unsupported operation
+// it is skipped and getSupportedOperations() continues (rather than failing and stopping).
+// As per IVGCVSW-710.
+BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+ ErrorStatus error;
+ std::vector<bool> sup;
+
+ ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+ {
+ error = status;
+ sup = supported;
+ };
+
+ Model model = {};
+
+ // operands
+ int32_t actValue = 0;
+ float weightValue[] = {2, 4, 1};
+ float biasValue[] = {4};
+
+ // broadcast add is unsupported at the time of writing this test, but any unsupported layer will do
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+ AddInputOperand(model, hidl_vec<uint32_t>{4});
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+
+ // fully connected
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 3});
+ AddTensorOperand(model, hidl_vec<uint32_t>{1, 3}, weightValue);
+ AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+ AddIntOperand(model, actValue);
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 1});
+
+ // broadcast mul is unsupported
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, 3, 4});
+
+ model.operations.resize(3);
+
+ // unsupported
+ model.operations[0].type = OperationType::ADD;
+ model.operations[0].inputs = hidl_vec<uint32_t>{0,1};
+ model.operations[0].outputs = hidl_vec<uint32_t>{2};
+
+ // supported
+ model.operations[1].type = OperationType::FULLY_CONNECTED;
+ model.operations[1].inputs = hidl_vec<uint32_t>{3, 4, 5, 6};
+ model.operations[1].outputs = hidl_vec<uint32_t>{7};
+
+ // unsupported
+ model.operations[2].type = OperationType::MUL;
+ model.operations[2].inputs = hidl_vec<uint32_t>{0,1};
+ model.operations[2].outputs = hidl_vec<uint32_t>{8};
+
+ // we are testing that the unsupported layers return false and the test continues
+ // rather than failing and stopping.
+ driver->getSupportedOperations(model, cb);
+ BOOST_TEST((int)error == (int)ErrorStatus::NONE);
+ BOOST_TEST(sup[0] == false);
+ BOOST_TEST(sup[1] == true);
+ BOOST_TEST(sup[2] == false);
+}
+
+// The purpose of this test is to ensure that when encountering an failure
+// during mem pool mapping we properly report an error to the framework via a callback
+BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
+{
+ auto driver = std::make_unique<ArmnnDriver>(armnn::Compute::CpuRef);
+
+ ErrorStatus error;
+ std::vector<bool> sup;
+
+ ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+ {
+ error = status;
+ sup = supported;
+ };
+
+ Model model = {};
+
+ model.pools = hidl_vec<hidl_memory>{hidl_memory("Unsuported hidl memory type", nullptr, 0)};
+
+ //memory pool mapping should fail, we should report an error
+ driver->getSupportedOperations(model, cb);
+ BOOST_TEST((int)error == (int)ErrorStatus::GENERAL_FAILURE);
+}
+
+namespace
+{
+
+void PaddingTestImpl(android::nn::PaddingScheme paddingScheme)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+ Model model = {};
+
+ uint32_t outSize = paddingScheme == kPaddingSame ? 2 : 1;
+
+ // add operands
+ float weightValue[] = {1, -1, 0, 1};
+ float biasValue[] = {0};
+
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 2, 3, 1});
+ AddTensorOperand(model, hidl_vec<uint32_t>{1, 2, 2, 1}, weightValue);
+ AddTensorOperand(model, hidl_vec<uint32_t>{1}, biasValue);
+ AddIntOperand(model, (int32_t)paddingScheme); // padding
+ AddIntOperand(model, 2); // stride x
+ AddIntOperand(model, 2); // stride y
+ AddIntOperand(model, 0); // no activation
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 1, outSize, 1});
+
+ // make the convolution operation
+ model.operations.resize(1);
+ model.operations[0].type = OperationType::CONV_2D;
+ model.operations[0].inputs = hidl_vec<uint32_t>{0, 1, 2, 3, 4, 5, 6};
+ model.operations[0].outputs = hidl_vec<uint32_t>{7};
+
+ // make the prepared model
+ android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+ // construct the request
+ DataLocation inloc = {};
+ inloc.poolIndex = 0;
+ inloc.offset = 0;
+ inloc.length = 6 * sizeof(float);
+ RequestArgument input = {};
+ input.location = inloc;
+ input.dimensions = hidl_vec<uint32_t>{};
+
+ DataLocation outloc = {};
+ outloc.poolIndex = 1;
+ outloc.offset = 0;
+ outloc.length = outSize * sizeof(float);
+ RequestArgument output = {};
+ output.location = outloc;
+ output.dimensions = hidl_vec<uint32_t>{};
+
+ Request request = {};
+ request.inputs = hidl_vec<RequestArgument>{input};
+ request.outputs = hidl_vec<RequestArgument>{output};
+
+
+ // set the input data (matching source test)
+ float indata[] = {4, 1, 0, 3, -1, 2};
+ AddPoolAndSetData(6, request, indata);
+
+ // add memory for the output
+ android::sp<IMemory> outMemory = AddPoolAndGetData(outSize, request);
+ float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+ // run the execution
+ Execute(preparedModel, request);
+
+ // check the result
+ if (paddingScheme == kPaddingValid)
+ {
+ BOOST_TEST(outdata[0] == 2);
+ }
+ else if (paddingScheme == kPaddingSame)
+ {
+ BOOST_TEST(outdata[0] == 2);
+ BOOST_TEST(outdata[1] == 0);
+ }
+ else
+ {
+ BOOST_TEST(false);
+ }
+}
+
+}
+
+BOOST_AUTO_TEST_CASE(ConvValidPadding)
+{
+ PaddingTestImpl(kPaddingValid);
+}
+
+BOOST_AUTO_TEST_CASE(ConvSamePadding)
+{
+ PaddingTestImpl(kPaddingSame);
+}
+
+BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+ ErrorStatus error;
+ std::vector<bool> sup;
+
+ ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+ {
+ error = status;
+ sup = supported;
+ };
+
+ Model model = {};
+
+ // operands
+ int32_t actValue = 0;
+ float weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1}; //identity
+ float biasValue[] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+ // fully connected operation
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 1, 1, 8});
+ AddTensorOperand(model, hidl_vec<uint32_t>{8, 8}, weightValue);
+ AddTensorOperand(model, hidl_vec<uint32_t>{8}, biasValue);
+ AddIntOperand(model, actValue);
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 8});
+
+ model.operations.resize(1);
+
+ model.operations[0].type = OperationType::FULLY_CONNECTED;
+ model.operations[0].inputs = hidl_vec<uint32_t>{0,1,2,3};
+ model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+ // make the prepared model
+ android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+
+ // construct the request
+ DataLocation inloc = {};
+ inloc.poolIndex = 0;
+ inloc.offset = 0;
+ inloc.length = 8 * sizeof(float);
+ RequestArgument input = {};
+ input.location = inloc;
+ input.dimensions = hidl_vec<uint32_t>{};
+
+ DataLocation outloc = {};
+ outloc.poolIndex = 1;
+ outloc.offset = 0;
+ outloc.length = 8 * sizeof(float);
+ RequestArgument output = {};
+ output.location = outloc;
+ output.dimensions = hidl_vec<uint32_t>{};
+
+ Request request = {};
+ request.inputs = hidl_vec<RequestArgument>{input};
+ request.outputs = hidl_vec<RequestArgument>{output};
+
+ // set the input data
+ float indata[] = {1,2,3,4,5,6,7,8};
+ AddPoolAndSetData(8, request, indata);
+
+ // add memory for the output
+ android::sp<IMemory> outMemory = AddPoolAndGetData(8, request);
+ float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+ // run the execution
+ Execute(preparedModel, request);
+
+ // check the result
+ BOOST_TEST(outdata[0] == 1);
+ BOOST_TEST(outdata[1] == 2);
+ BOOST_TEST(outdata[2] == 3);
+ BOOST_TEST(outdata[3] == 4);
+ BOOST_TEST(outdata[4] == 5);
+ BOOST_TEST(outdata[5] == 6);
+ BOOST_TEST(outdata[6] == 7);
+ BOOST_TEST(outdata[7] == 8);
+}
+
+BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
+{
+ auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+ ErrorStatus error;
+ std::vector<bool> sup;
+
+ ArmnnDriver::getSupportedOperations_cb cb = [&](ErrorStatus status, const std::vector<bool>& supported)
+ {
+ error = status;
+ sup = supported;
+ };
+
+ Model model = {};
+
+ // operands
+ int32_t actValue = 0;
+ float weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1}; //identity
+ float biasValue[] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+ // fully connected operation
+ AddInputOperand(model, hidl_vec<uint32_t>{1, 2, 2, 2});
+ AddTensorOperand(model, hidl_vec<uint32_t>{8, 8}, weightValue);
+ AddTensorOperand(model, hidl_vec<uint32_t>{8}, biasValue);
+ AddIntOperand(model, actValue);
+ AddOutputOperand(model, hidl_vec<uint32_t>{1, 8});
+
+ model.operations.resize(1);
+
+ model.operations[0].type = OperationType::FULLY_CONNECTED;
+ model.operations[0].inputs = hidl_vec<uint32_t>{0,1,2,3};
+ model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+ // make the prepared model
+ android::sp<IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+
+ // construct the request
+ DataLocation inloc = {};
+ inloc.poolIndex = 0;
+ inloc.offset = 0;
+ inloc.length = 8 * sizeof(float);
+ RequestArgument input = {};
+ input.location = inloc;
+ input.dimensions = hidl_vec<uint32_t>{};
+
+ DataLocation outloc = {};
+ outloc.poolIndex = 1;
+ outloc.offset = 0;
+ outloc.length = 8 * sizeof(float);
+ RequestArgument output = {};
+ output.location = outloc;
+ output.dimensions = hidl_vec<uint32_t>{};
+
+ Request request = {};
+ request.inputs = hidl_vec<RequestArgument>{input};
+ request.outputs = hidl_vec<RequestArgument>{output};
+
+ // set the input data
+ float indata[] = {1,2,3,4,5,6,7,8};
+ AddPoolAndSetData(8, request, indata);
+
+ // add memory for the output
+ android::sp<IMemory> outMemory = AddPoolAndGetData(8, request);
+ float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+ // run the execution
+ Execute(preparedModel, request);
+
+ // check the result
+ BOOST_TEST(outdata[0] == 1);
+ BOOST_TEST(outdata[1] == 2);
+ BOOST_TEST(outdata[2] == 3);
+ BOOST_TEST(outdata[3] == 4);
+ BOOST_TEST(outdata[4] == 5);
+ BOOST_TEST(outdata[5] == 6);
+ BOOST_TEST(outdata[6] == 7);
+ BOOST_TEST(outdata[7] == 8);
+}
+
+BOOST_AUTO_TEST_SUITE_END()