From 10b4dfd8e9ccd7a03df7bb053ee1c644cb37f8ab Mon Sep 17 00:00:00 2001 From: David Beck Date: Wed, 19 Sep 2018 12:03:20 +0100 Subject: IVGCVSW-1897 : build infrastructure for the src/backends folder Change-Id: I7ebafb675ccc77ad54d1deb01412a8379a5356bb --- src/backends/ArmComputeTensorUtils.cpp | 135 + src/backends/ArmComputeTensorUtils.hpp | 199 + src/backends/ArmComputeUtils.hpp | 125 + src/backends/CMakeLists.txt | 74 + src/backends/ClContextControl.cpp | 235 + src/backends/ClContextControl.hpp | 62 + src/backends/ClLayerSupport.cpp | 468 ++ src/backends/ClLayerSupport.hpp | 158 + src/backends/ClTensorHandle.hpp | 136 + src/backends/ClWorkloadFactory.cpp | 490 ++ src/backends/ClWorkloadFactory.hpp | 136 + src/backends/ClWorkloads.hpp | 40 + src/backends/ClWorkloads/CMakeLists.txt | 89 + .../ClWorkloads/ClActivationFloatWorkload.cpp | 56 + .../ClWorkloads/ClActivationFloatWorkload.hpp | 29 + .../ClWorkloads/ClActivationUint8Workload.cpp | 44 + .../ClWorkloads/ClActivationUint8Workload.hpp | 29 + src/backends/ClWorkloads/ClAdditionWorkload.cpp | 66 + src/backends/ClWorkloads/ClAdditionWorkload.hpp | 31 + .../ClWorkloads/ClBaseConstantWorkload.cpp | 64 + .../ClWorkloads/ClBaseConstantWorkload.hpp | 30 + src/backends/ClWorkloads/ClBaseMergerWorkload.hpp | 28 + .../ClWorkloads/ClBaseSplitterWorkload.hpp | 28 + .../ClBatchNormalizationFloatWorkload.cpp | 96 + .../ClBatchNormalizationFloatWorkload.hpp | 46 + .../ClWorkloads/ClConstantFloatWorkload.cpp | 18 + .../ClWorkloads/ClConstantFloatWorkload.hpp | 20 + .../ClWorkloads/ClConstantUint8Workload.cpp | 18 + .../ClWorkloads/ClConstantUint8Workload.hpp | 20 + .../ClWorkloads/ClConvertFp16ToFp32Workload.cpp | 66 + .../ClWorkloads/ClConvertFp16ToFp32Workload.hpp | 30 + .../ClWorkloads/ClConvertFp32ToFp16Workload.cpp | 66 + .../ClWorkloads/ClConvertFp32ToFp16Workload.hpp | 30 + .../ClWorkloads/ClConvolution2dBaseWorkload.cpp | 48 + .../ClWorkloads/ClConvolution2dBaseWorkload.hpp | 24 + .../ClWorkloads/ClConvolution2dFloatWorkload.cpp | 81 + .../ClWorkloads/ClConvolution2dFloatWorkload.hpp | 35 + .../ClWorkloads/ClConvolution2dUint8Workload.cpp | 81 + .../ClWorkloads/ClConvolution2dUint8Workload.hpp | 35 + .../ClDepthwiseConvolutionBaseWorkload.cpp | 125 + .../ClDepthwiseConvolutionBaseWorkload.hpp | 40 + .../ClDepthwiseConvolutionFloatWorkload.cpp | 39 + .../ClDepthwiseConvolutionFloatWorkload.hpp | 26 + .../ClDepthwiseConvolutionUint8Workload.cpp | 40 + .../ClDepthwiseConvolutionUint8Workload.hpp | 23 + .../ClWorkloads/ClDivisionFloatWorkload.cpp | 48 + .../ClWorkloads/ClDivisionFloatWorkload.hpp | 32 + src/backends/ClWorkloads/ClFloorFloatWorkload.cpp | 31 + src/backends/ClWorkloads/ClFloorFloatWorkload.hpp | 30 + .../ClWorkloads/ClFullyConnectedWorkload.cpp | 111 + .../ClWorkloads/ClFullyConnectedWorkload.hpp | 43 + .../ClWorkloads/ClL2NormalizationFloatWorkload.cpp | 49 + .../ClWorkloads/ClL2NormalizationFloatWorkload.hpp | 34 + src/backends/ClWorkloads/ClLstmFloatWorkload.cpp | 408 ++ src/backends/ClWorkloads/ClLstmFloatWorkload.hpp | 68 + src/backends/ClWorkloads/ClMergerFloatWorkload.cpp | 20 + src/backends/ClWorkloads/ClMergerFloatWorkload.hpp | 22 + src/backends/ClWorkloads/ClMergerUint8Workload.cpp | 19 + src/backends/ClWorkloads/ClMergerUint8Workload.hpp | 21 + .../ClWorkloads/ClMultiplicationFloatWorkload.cpp | 60 + .../ClWorkloads/ClMultiplicationFloatWorkload.hpp | 34 + .../ClWorkloads/ClNormalizationFloatWorkload.cpp | 50 + .../ClWorkloads/ClNormalizationFloatWorkload.hpp | 29 + src/backends/ClWorkloads/ClPermuteWorkload.cpp | 56 + src/backends/ClWorkloads/ClPermuteWorkload.hpp | 42 + .../ClWorkloads/ClPooling2dBaseWorkload.cpp | 47 + .../ClWorkloads/ClPooling2dBaseWorkload.hpp | 33 + .../ClWorkloads/ClPooling2dFloatWorkload.cpp | 26 + .../ClWorkloads/ClPooling2dFloatWorkload.hpp | 22 + .../ClWorkloads/ClPooling2dUint8Workload.cpp | 27 + .../ClWorkloads/ClPooling2dUint8Workload.hpp | 25 + .../ClWorkloads/ClReshapeFloatWorkload.cpp | 33 + .../ClWorkloads/ClReshapeFloatWorkload.hpp | 28 + .../ClWorkloads/ClReshapeUint8Workload.cpp | 31 + .../ClWorkloads/ClReshapeUint8Workload.hpp | 29 + .../ClWorkloads/ClResizeBilinearFloatWorkload.cpp | 38 + .../ClWorkloads/ClResizeBilinearFloatWorkload.hpp | 25 + src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp | 30 + src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp | 17 + .../ClWorkloads/ClSoftmaxFloatWorkload.cpp | 33 + .../ClWorkloads/ClSoftmaxFloatWorkload.hpp | 30 + .../ClWorkloads/ClSoftmaxUint8Workload.cpp | 43 + .../ClWorkloads/ClSoftmaxUint8Workload.hpp | 31 + .../ClWorkloads/ClSplitterFloatWorkload.cpp | 19 + .../ClWorkloads/ClSplitterFloatWorkload.hpp | 20 + .../ClWorkloads/ClSplitterUint8Workload.cpp | 19 + .../ClWorkloads/ClSplitterUint8Workload.hpp | 21 + src/backends/ClWorkloads/ClSubtractionWorkload.cpp | 66 + src/backends/ClWorkloads/ClSubtractionWorkload.hpp | 31 + src/backends/ClWorkloads/ClWorkloadUtils.hpp | 62 + src/backends/ClWorkloads/backend.cmake | 9 + src/backends/CpuTensorHandle.cpp | 113 + src/backends/CpuTensorHandle.hpp | 171 + src/backends/CpuTensorHandleFwd.hpp | 16 + src/backends/ITensorHandle.hpp | 73 + src/backends/MakeWorkloadHelper.hpp | 78 + src/backends/MemCopyWorkload.cpp | 61 + src/backends/MemCopyWorkload.hpp | 26 + src/backends/NeonLayerSupport.cpp | 468 ++ src/backends/NeonLayerSupport.hpp | 163 + src/backends/NeonTensorHandle.hpp | 137 + src/backends/NeonWorkloadFactory.cpp | 479 ++ src/backends/NeonWorkloadFactory.hpp | 135 + src/backends/NeonWorkloadUtils.cpp | 60 + src/backends/NeonWorkloadUtils.hpp | 34 + src/backends/NeonWorkloads.hpp | 41 + src/backends/NeonWorkloads/CMakeLists.txt | 83 + .../NeonWorkloads/NeonActivationFloatWorkload.cpp | 57 + .../NeonWorkloads/NeonActivationFloatWorkload.hpp | 29 + .../NeonWorkloads/NeonActivationUint8Workload.cpp | 35 + .../NeonWorkloads/NeonActivationUint8Workload.hpp | 28 + .../NeonWorkloads/NeonAdditionFloatWorkload.cpp | 48 + .../NeonWorkloads/NeonAdditionFloatWorkload.hpp | 30 + .../NeonWorkloads/NeonBaseConstantWorkload.hpp | 83 + .../NeonWorkloads/NeonBaseMergerWorkload.hpp | 26 + .../NeonWorkloads/NeonBaseSplitterWorkload.hpp | 27 + .../NeonBatchNormalizationFloatWorkload.cpp | 96 + .../NeonBatchNormalizationFloatWorkload.hpp | 42 + .../NeonWorkloads/NeonConstantFloatWorkload.cpp | 17 + .../NeonWorkloads/NeonConstantFloatWorkload.hpp | 20 + .../NeonWorkloads/NeonConstantUint8Workload.cpp | 17 + .../NeonWorkloads/NeonConstantUint8Workload.hpp | 20 + .../NeonConvertFp16ToFp32Workload.cpp | 41 + .../NeonConvertFp16ToFp32Workload.hpp | 26 + .../NeonConvertFp32ToFp16Workload.cpp | 43 + .../NeonConvertFp32ToFp16Workload.hpp | 26 + .../NeonConvolution2dBaseWorkload.cpp | 146 + .../NeonConvolution2dBaseWorkload.hpp | 49 + .../NeonConvolution2dFloatWorkload.cpp | 40 + .../NeonConvolution2dFloatWorkload.hpp | 29 + .../NeonConvolution2dUint8Workload.cpp | 35 + .../NeonConvolution2dUint8Workload.hpp | 29 + .../NeonDepthwiseConvolutionBaseWorkload.cpp | 49 + .../NeonDepthwiseConvolutionBaseWorkload.hpp | 21 + .../NeonDepthwiseConvolutionFloatWorkload.cpp | 94 + .../NeonDepthwiseConvolutionFloatWorkload.hpp | 33 + .../NeonDepthwiseConvolutionUint8Workload.cpp | 94 + .../NeonDepthwiseConvolutionUint8Workload.hpp | 29 + .../NeonWorkloads/NeonFloorFloatWorkload.cpp | 30 + .../NeonWorkloads/NeonFloorFloatWorkload.hpp | 27 + .../NeonFullyConnectedFloatWorkload.cpp | 96 + .../NeonFullyConnectedFloatWorkload.hpp | 40 + .../NeonL2NormalizationFloatWorkload.cpp | 42 + .../NeonL2NormalizationFloatWorkload.hpp | 33 + .../NeonWorkloads/NeonLstmFloatWorkload.cpp | 22 + .../NeonWorkloads/NeonLstmFloatWorkload.hpp | 20 + .../NeonWorkloads/NeonMergerFloatWorkload.cpp | 17 + .../NeonWorkloads/NeonMergerFloatWorkload.hpp | 20 + .../NeonWorkloads/NeonMergerUint8Workload.cpp | 17 + .../NeonWorkloads/NeonMergerUint8Workload.hpp | 20 + .../NeonMultiplicationFloatWorkload.cpp | 60 + .../NeonMultiplicationFloatWorkload.hpp | 30 + .../NeonNormalizationFloatWorkload.cpp | 70 + .../NeonNormalizationFloatWorkload.hpp | 34 + src/backends/NeonWorkloads/NeonPermuteWorkload.cpp | 54 + src/backends/NeonWorkloads/NeonPermuteWorkload.hpp | 43 + .../NeonWorkloads/NeonPooling2dBaseWorkload.cpp | 47 + .../NeonWorkloads/NeonPooling2dBaseWorkload.hpp | 37 + .../NeonWorkloads/NeonPooling2dFloatWorkload.cpp | 27 + .../NeonWorkloads/NeonPooling2dFloatWorkload.hpp | 25 + .../NeonWorkloads/NeonPooling2dUint8Workload.cpp | 26 + .../NeonWorkloads/NeonPooling2dUint8Workload.hpp | 25 + .../NeonWorkloads/NeonReshapeFloatWorkload.cpp | 32 + .../NeonWorkloads/NeonReshapeFloatWorkload.hpp | 29 + .../NeonWorkloads/NeonReshapeUint8Workload.cpp | 30 + .../NeonWorkloads/NeonReshapeUint8Workload.hpp | 27 + .../NeonWorkloads/NeonSoftmaxBaseWorkload.cpp | 30 + .../NeonWorkloads/NeonSoftmaxBaseWorkload.hpp | 17 + .../NeonWorkloads/NeonSoftmaxFloatWorkload.cpp | 32 + .../NeonWorkloads/NeonSoftmaxFloatWorkload.hpp | 29 + .../NeonWorkloads/NeonSoftmaxUint8Workload.cpp | 41 + .../NeonWorkloads/NeonSoftmaxUint8Workload.hpp | 27 + .../NeonWorkloads/NeonSplitterFloatWorkload.cpp | 17 + .../NeonWorkloads/NeonSplitterFloatWorkload.hpp | 20 + .../NeonWorkloads/NeonSplitterUint8Workload.cpp | 17 + .../NeonWorkloads/NeonSplitterUint8Workload.hpp | 20 + .../NeonWorkloads/NeonSubtractionFloatWorkload.cpp | 46 + .../NeonWorkloads/NeonSubtractionFloatWorkload.hpp | 27 + src/backends/NeonWorkloads/backend.cmake | 9 + src/backends/OutputHandler.cpp | 33 + src/backends/OutputHandler.hpp | 63 + src/backends/RefLayerSupport.cpp | 398 ++ src/backends/RefLayerSupport.hpp | 155 + src/backends/RefWorkloadFactory.cpp | 249 + src/backends/RefWorkloadFactory.hpp | 145 + src/backends/RefWorkloads.hpp | 53 + src/backends/RefWorkloads/Activation.cpp | 91 + src/backends/RefWorkloads/Activation.hpp | 20 + src/backends/RefWorkloads/ArithmeticFunction.cpp | 29 + src/backends/RefWorkloads/ArithmeticFunction.hpp | 24 + src/backends/RefWorkloads/BatchNormImpl.hpp | 56 + src/backends/RefWorkloads/Broadcast.cpp | 33 + src/backends/RefWorkloads/Broadcast.hpp | 58 + src/backends/RefWorkloads/CMakeLists.txt | 101 + src/backends/RefWorkloads/ConvImpl.cpp | 71 + src/backends/RefWorkloads/ConvImpl.hpp | 187 + src/backends/RefWorkloads/FullyConnected.cpp | 62 + src/backends/RefWorkloads/FullyConnected.hpp | 22 + src/backends/RefWorkloads/Merger.hpp | 82 + src/backends/RefWorkloads/Pooling2d.cpp | 241 + src/backends/RefWorkloads/Pooling2d.hpp | 21 + .../RefWorkloads/RefActivationFloat32Workload.cpp | 28 + .../RefWorkloads/RefActivationFloat32Workload.hpp | 20 + .../RefWorkloads/RefActivationUint8Workload.cpp | 38 + .../RefWorkloads/RefActivationUint8Workload.hpp | 21 + .../RefWorkloads/RefArithmeticWorkload.cpp | 69 + .../RefWorkloads/RefArithmeticWorkload.hpp | 122 + .../RefWorkloads/RefBaseConstantWorkload.cpp | 49 + .../RefWorkloads/RefBaseConstantWorkload.hpp | 33 + .../RefBatchNormalizationFloat32Workload.cpp | 38 + .../RefBatchNormalizationFloat32Workload.hpp | 28 + .../RefBatchNormalizationUint8Workload.cpp | 47 + .../RefBatchNormalizationUint8Workload.hpp | 28 + .../RefWorkloads/RefConstantFloat32Workload.cpp | 19 + .../RefWorkloads/RefConstantFloat32Workload.hpp | 20 + .../RefWorkloads/RefConstantUint8Workload.cpp | 19 + .../RefWorkloads/RefConstantUint8Workload.hpp | 20 + .../RefWorkloads/RefConvertFp16ToFp32Workload.cpp | 25 + .../RefWorkloads/RefConvertFp16ToFp32Workload.hpp | 21 + .../RefWorkloads/RefConvertFp32ToFp16Workload.cpp | 29 + .../RefWorkloads/RefConvertFp32ToFp16Workload.hpp | 21 + .../RefConvolution2dFloat32Workload.cpp | 37 + .../RefConvolution2dFloat32Workload.hpp | 27 + .../RefWorkloads/RefConvolution2dUint8Workload.cpp | 45 + .../RefWorkloads/RefConvolution2dUint8Workload.hpp | 28 + .../RefDepthwiseConvolution2dFloat32Workload.cpp | 37 + .../RefDepthwiseConvolution2dFloat32Workload.hpp | 27 + .../RefDepthwiseConvolution2dUint8Workload.cpp | 46 + .../RefDepthwiseConvolution2dUint8Workload.hpp | 26 + .../RefFakeQuantizationFloat32Workload.cpp | 42 + .../RefFakeQuantizationFloat32Workload.hpp | 21 + .../RefWorkloads/RefFloorFloat32Workload.cpp | 29 + .../RefWorkloads/RefFloorFloat32Workload.hpp | 21 + .../RefFullyConnectedFloat32Workload.cpp | 43 + .../RefFullyConnectedFloat32Workload.hpp | 26 + .../RefFullyConnectedUint8Workload.cpp | 66 + .../RefFullyConnectedUint8Workload.hpp | 26 + .../RefL2NormalizationFloat32Workload.cpp | 61 + .../RefL2NormalizationFloat32Workload.hpp | 21 + .../RefWorkloads/RefLstmFloat32Workload.cpp | 16 + .../RefWorkloads/RefLstmFloat32Workload.hpp | 21 + .../RefWorkloads/RefMergerFloat32Workload.cpp | 21 + .../RefWorkloads/RefMergerFloat32Workload.hpp | 21 + .../RefWorkloads/RefMergerUint8Workload.cpp | 21 + .../RefWorkloads/RefMergerUint8Workload.hpp | 21 + .../RefNormalizationFloat32Workload.cpp | 185 + .../RefNormalizationFloat32Workload.hpp | 21 + src/backends/RefWorkloads/RefPermuteWorkload.cpp | 32 + src/backends/RefWorkloads/RefPermuteWorkload.hpp | 33 + .../RefWorkloads/RefPooling2dFloat32Workload.cpp | 33 + .../RefWorkloads/RefPooling2dFloat32Workload.hpp | 21 + .../RefWorkloads/RefPooling2dUint8Workload.cpp | 37 + .../RefWorkloads/RefPooling2dUint8Workload.hpp | 21 + .../RefWorkloads/RefReshapeFloat32Workload.cpp | 27 + .../RefWorkloads/RefReshapeFloat32Workload.hpp | 21 + .../RefWorkloads/RefReshapeUint8Workload.cpp | 27 + .../RefWorkloads/RefReshapeUint8Workload.hpp | 21 + .../RefResizeBilinearFloat32Workload.cpp | 29 + .../RefResizeBilinearFloat32Workload.hpp | 21 + .../RefResizeBilinearUint8Workload.cpp | 33 + .../RefResizeBilinearUint8Workload.hpp | 21 + .../RefWorkloads/RefSoftmaxFloat32Workload.cpp | 26 + .../RefWorkloads/RefSoftmaxFloat32Workload.hpp | 21 + .../RefWorkloads/RefSoftmaxUint8Workload.cpp | 36 + .../RefWorkloads/RefSoftmaxUint8Workload.hpp | 21 + .../RefWorkloads/RefSplitterFloat32Workload.cpp | 21 + .../RefWorkloads/RefSplitterFloat32Workload.hpp | 21 + .../RefWorkloads/RefSplitterUint8Workload.cpp | 21 + .../RefWorkloads/RefSplitterUint8Workload.hpp | 21 + src/backends/RefWorkloads/RefWorkloadUtils.hpp | 138 + src/backends/RefWorkloads/ResizeBilinear.cpp | 92 + src/backends/RefWorkloads/ResizeBilinear.hpp | 15 + src/backends/RefWorkloads/Softmax.cpp | 49 + src/backends/RefWorkloads/Softmax.hpp | 16 + src/backends/RefWorkloads/Splitter.hpp | 84 + .../RefWorkloads/TensorBufferArrayView.hpp | 42 + src/backends/RefWorkloads/backend.cmake | 7 + src/backends/StringMapping.cpp | 17 + src/backends/StringMapping.hpp | 49 + src/backends/Workload.hpp | 147 + src/backends/WorkloadData.cpp | 871 ++++ src/backends/WorkloadData.hpp | 330 ++ src/backends/WorkloadDataCollector.hpp | 36 + src/backends/WorkloadDataFwd.hpp | 27 + src/backends/WorkloadFactory.cpp | 571 +++ src/backends/WorkloadFactory.hpp | 137 + src/backends/WorkloadInfo.hpp | 18 + src/backends/WorkloadUtils.hpp | 139 + src/backends/backends.cmake | 15 + src/backends/test/ActivationFixture.hpp | 56 + src/backends/test/ActivationTestImpl.hpp | 560 +++ src/backends/test/ArmComputeCl.cpp | 311 ++ src/backends/test/ArmComputeNeon.cpp | 463 ++ src/backends/test/BatchNormTestImpl.hpp | 112 + src/backends/test/ClContextControlFixture.hpp | 21 + src/backends/test/Conv2dTestImpl.hpp | 921 ++++ src/backends/test/ConvertFp16ToFp32TestImpl.hpp | 55 + src/backends/test/ConvertFp32ToFp16TestImpl.hpp | 55 + src/backends/test/CreateWorkloadCl.cpp | 564 +++ src/backends/test/CreateWorkloadNeon.cpp | 455 ++ src/backends/test/CreateWorkloadRef.cpp | 478 ++ src/backends/test/FullyConnectedTestImpl.hpp | 287 ++ src/backends/test/IsLayerSupportedTest.cpp | 239 + src/backends/test/IsLayerSupportedTestImpl.hpp | 565 +++ src/backends/test/LayerReleaseConstantDataTest.cpp | 212 + src/backends/test/LayerTests.cpp | 4750 ++++++++++++++++++++ src/backends/test/LayerTests.hpp | 345 ++ src/backends/test/LstmTestImpl.hpp | 1150 +++++ src/backends/test/MemCopyTests.cpp | 180 + src/backends/test/NormTestImpl.hpp | 241 + src/backends/test/PermuteTestImpl.hpp | 225 + src/backends/test/Pooling2dTestImpl.hpp | 1116 +++++ src/backends/test/QuantizeHelper.hpp | 91 + src/backends/test/Reference.cpp | 253 ++ src/backends/test/ReshapeTestImpl.hpp | 177 + src/backends/test/SoftmaxTestImpl.hpp | 153 + src/backends/test/SplitterTestImpl.hpp | 307 ++ src/backends/test/TensorCopyUtils.cpp | 159 + src/backends/test/TensorCopyUtils.hpp | 14 + src/backends/test/WorkloadDataValidation.cpp | 471 ++ src/backends/test/WorkloadTestUtils.hpp | 55 + 321 files changed, 32821 insertions(+) create mode 100644 src/backends/ArmComputeTensorUtils.cpp create mode 100644 src/backends/ArmComputeTensorUtils.hpp create mode 100644 src/backends/ArmComputeUtils.hpp create mode 100644 src/backends/CMakeLists.txt create mode 100644 src/backends/ClContextControl.cpp create mode 100644 src/backends/ClContextControl.hpp create mode 100644 src/backends/ClLayerSupport.cpp create mode 100644 src/backends/ClLayerSupport.hpp create mode 100644 src/backends/ClTensorHandle.hpp create mode 100644 src/backends/ClWorkloadFactory.cpp create mode 100644 src/backends/ClWorkloadFactory.hpp create mode 100644 src/backends/ClWorkloads.hpp create mode 100644 src/backends/ClWorkloads/CMakeLists.txt create mode 100644 src/backends/ClWorkloads/ClActivationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClActivationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClActivationUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClActivationUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClAdditionWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClAdditionWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBaseConstantWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClBaseConstantWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBaseMergerWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConstantFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClConstantFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConstantUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConstantUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClFloorFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClFloorFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClLstmFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClLstmFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClMergerFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClMergerFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClMergerUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClMergerUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPermuteWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClPermuteWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClReshapeUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClReshapeUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSplitterUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClSplitterUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClSubtractionWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSubtractionWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClWorkloadUtils.hpp create mode 100644 src/backends/ClWorkloads/backend.cmake create mode 100644 src/backends/CpuTensorHandle.cpp create mode 100644 src/backends/CpuTensorHandle.hpp create mode 100644 src/backends/CpuTensorHandleFwd.hpp create mode 100644 src/backends/ITensorHandle.hpp create mode 100644 src/backends/MakeWorkloadHelper.hpp create mode 100644 src/backends/MemCopyWorkload.cpp create mode 100644 src/backends/MemCopyWorkload.hpp create mode 100644 src/backends/NeonLayerSupport.cpp create mode 100644 src/backends/NeonLayerSupport.hpp create mode 100644 src/backends/NeonTensorHandle.hpp create mode 100644 src/backends/NeonWorkloadFactory.cpp create mode 100644 src/backends/NeonWorkloadFactory.hpp create mode 100644 src/backends/NeonWorkloadUtils.cpp create mode 100644 src/backends/NeonWorkloadUtils.hpp create mode 100644 src/backends/NeonWorkloads.hpp create mode 100644 src/backends/NeonWorkloads/CMakeLists.txt create mode 100644 src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPermuteWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPermuteWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/backend.cmake create mode 100644 src/backends/OutputHandler.cpp create mode 100644 src/backends/OutputHandler.hpp create mode 100644 src/backends/RefLayerSupport.cpp create mode 100644 src/backends/RefLayerSupport.hpp create mode 100644 src/backends/RefWorkloadFactory.cpp create mode 100644 src/backends/RefWorkloadFactory.hpp create mode 100644 src/backends/RefWorkloads.hpp create mode 100644 src/backends/RefWorkloads/Activation.cpp create mode 100644 src/backends/RefWorkloads/Activation.hpp create mode 100644 src/backends/RefWorkloads/ArithmeticFunction.cpp create mode 100644 src/backends/RefWorkloads/ArithmeticFunction.hpp create mode 100644 src/backends/RefWorkloads/BatchNormImpl.hpp create mode 100644 src/backends/RefWorkloads/Broadcast.cpp create mode 100644 src/backends/RefWorkloads/Broadcast.hpp create mode 100644 src/backends/RefWorkloads/CMakeLists.txt create mode 100644 src/backends/RefWorkloads/ConvImpl.cpp create mode 100644 src/backends/RefWorkloads/ConvImpl.hpp create mode 100644 src/backends/RefWorkloads/FullyConnected.cpp create mode 100644 src/backends/RefWorkloads/FullyConnected.hpp create mode 100644 src/backends/RefWorkloads/Merger.hpp create mode 100644 src/backends/RefWorkloads/Pooling2d.cpp create mode 100644 src/backends/RefWorkloads/Pooling2d.hpp create mode 100644 src/backends/RefWorkloads/RefActivationFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefActivationFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefActivationUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefActivationUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefArithmeticWorkload.cpp create mode 100644 src/backends/RefWorkloads/RefArithmeticWorkload.hpp create mode 100644 src/backends/RefWorkloads/RefBaseConstantWorkload.cpp create mode 100644 src/backends/RefWorkloads/RefBaseConstantWorkload.hpp create mode 100644 src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefConstantFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefConstantFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefConstantUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefConstantUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefFloorFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefFloorFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefLstmFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefLstmFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefMergerFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefMergerFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefMergerUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefMergerUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefPermuteWorkload.cpp create mode 100644 src/backends/RefWorkloads/RefPermuteWorkload.hpp create mode 100644 src/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefPooling2dUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefPooling2dUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefReshapeFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefReshapeFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefReshapeUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefReshapeUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefSplitterFloat32Workload.cpp create mode 100644 src/backends/RefWorkloads/RefSplitterFloat32Workload.hpp create mode 100644 src/backends/RefWorkloads/RefSplitterUint8Workload.cpp create mode 100644 src/backends/RefWorkloads/RefSplitterUint8Workload.hpp create mode 100644 src/backends/RefWorkloads/RefWorkloadUtils.hpp create mode 100644 src/backends/RefWorkloads/ResizeBilinear.cpp create mode 100644 src/backends/RefWorkloads/ResizeBilinear.hpp create mode 100644 src/backends/RefWorkloads/Softmax.cpp create mode 100644 src/backends/RefWorkloads/Softmax.hpp create mode 100644 src/backends/RefWorkloads/Splitter.hpp create mode 100644 src/backends/RefWorkloads/TensorBufferArrayView.hpp create mode 100644 src/backends/RefWorkloads/backend.cmake create mode 100644 src/backends/StringMapping.cpp create mode 100644 src/backends/StringMapping.hpp create mode 100644 src/backends/Workload.hpp create mode 100644 src/backends/WorkloadData.cpp create mode 100644 src/backends/WorkloadData.hpp create mode 100644 src/backends/WorkloadDataCollector.hpp create mode 100644 src/backends/WorkloadDataFwd.hpp create mode 100644 src/backends/WorkloadFactory.cpp create mode 100644 src/backends/WorkloadFactory.hpp create mode 100644 src/backends/WorkloadInfo.hpp create mode 100644 src/backends/WorkloadUtils.hpp create mode 100644 src/backends/backends.cmake create mode 100644 src/backends/test/ActivationFixture.hpp create mode 100644 src/backends/test/ActivationTestImpl.hpp create mode 100644 src/backends/test/ArmComputeCl.cpp create mode 100644 src/backends/test/ArmComputeNeon.cpp create mode 100644 src/backends/test/BatchNormTestImpl.hpp create mode 100644 src/backends/test/ClContextControlFixture.hpp create mode 100644 src/backends/test/Conv2dTestImpl.hpp create mode 100644 src/backends/test/ConvertFp16ToFp32TestImpl.hpp create mode 100644 src/backends/test/ConvertFp32ToFp16TestImpl.hpp create mode 100644 src/backends/test/CreateWorkloadCl.cpp create mode 100644 src/backends/test/CreateWorkloadNeon.cpp create mode 100644 src/backends/test/CreateWorkloadRef.cpp create mode 100644 src/backends/test/FullyConnectedTestImpl.hpp create mode 100644 src/backends/test/IsLayerSupportedTest.cpp create mode 100644 src/backends/test/IsLayerSupportedTestImpl.hpp create mode 100644 src/backends/test/LayerReleaseConstantDataTest.cpp create mode 100644 src/backends/test/LayerTests.cpp create mode 100644 src/backends/test/LayerTests.hpp create mode 100644 src/backends/test/LstmTestImpl.hpp create mode 100644 src/backends/test/MemCopyTests.cpp create mode 100644 src/backends/test/NormTestImpl.hpp create mode 100644 src/backends/test/PermuteTestImpl.hpp create mode 100644 src/backends/test/Pooling2dTestImpl.hpp create mode 100644 src/backends/test/QuantizeHelper.hpp create mode 100644 src/backends/test/Reference.cpp create mode 100644 src/backends/test/ReshapeTestImpl.hpp create mode 100644 src/backends/test/SoftmaxTestImpl.hpp create mode 100644 src/backends/test/SplitterTestImpl.hpp create mode 100644 src/backends/test/TensorCopyUtils.cpp create mode 100644 src/backends/test/TensorCopyUtils.hpp create mode 100644 src/backends/test/WorkloadDataValidation.cpp create mode 100644 src/backends/test/WorkloadTestUtils.hpp (limited to 'src/backends') diff --git a/src/backends/ArmComputeTensorUtils.cpp b/src/backends/ArmComputeTensorUtils.cpp new file mode 100644 index 0000000000..ba9fb40cfc --- /dev/null +++ b/src/backends/ArmComputeTensorUtils.cpp @@ -0,0 +1,135 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "ArmComputeTensorUtils.hpp" +#include "ArmComputeUtils.hpp" + +#include + +namespace armnn +{ +namespace armcomputetensorutils +{ + +arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType) +{ + switch(dataType) + { + case armnn::DataType::Float16: + return arm_compute::DataType::F16; + case armnn::DataType::Float32: + return arm_compute::DataType::F32; + case armnn::DataType::QuantisedAsymm8: + return arm_compute::DataType::QASYMM8; + case armnn::DataType::Signed32: + return arm_compute::DataType::S32; + default: + BOOST_ASSERT_MSG(false, "Unknown data type"); + return arm_compute::DataType::UNKNOWN; + } +} + +arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape) +{ + arm_compute::TensorShape shape; + + // armnn tensors are (batch, channels, height, width). + // arm_compute tensors are (width, height, channels, batch). + for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++) + { + // Note that our dimensions are stored in the opposite order to ACL's. + shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i]); + + // TensorShape::set() flattens leading ones, so that batch size 1 cannot happen. + // arm_compute tensors expect this. + } + + // prevent arm_compute issue where tensor is flattened to nothing + if (shape.num_dimensions() == 0) + { + shape.set_num_dimensions(1); + } + + return shape; +} + +// Utility function used to build a TensorInfo object, that can be used to initialise +// ARM Compute Tensor and CLTensor allocators. +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo) +{ + const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape()); + const arm_compute::DataType aclDataType = GetArmComputeDataType(tensorInfo.GetDataType()); + const arm_compute::QuantizationInfo aclQuantizationInfo(tensorInfo.GetQuantizationScale(), + tensorInfo.GetQuantizationOffset()); + + return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo); +} + +arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor) +{ + using arm_compute::PoolingType; + using arm_compute::DimensionRoundingType; + using arm_compute::PadStrideInfo; + using arm_compute::PoolingLayerInfo; + using arm_compute::Size2D; + + // Resolve ARM Compute layer parameters. + const PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType); + + bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0); + //use specific constructor if global pooling + if(isGlobalPooling) + { + return arm_compute::PoolingLayerInfo(poolingType); + } + + const DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType( + descriptor.m_OutputShapeRounding); + const PadStrideInfo padStrideInfo(descriptor.m_StrideX, + descriptor.m_StrideY, + descriptor.m_PadLeft, + descriptor.m_PadRight, + descriptor.m_PadTop, + descriptor.m_PadBottom, + rounding); + + const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude); + + const Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight); + + return arm_compute::PoolingLayerInfo(poolingType, poolSize, padStrideInfo, excludePadding); +} + +arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor) +{ + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(descriptor.m_NormChannelType); + return arm_compute::NormalizationLayerInfo(normType, + descriptor.m_NormSize, + descriptor.m_Alpha, + descriptor.m_Beta, + descriptor.m_K, + false); +} + +arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm) +{ + arm_compute::PermutationVector aclPerm; + + unsigned int start = 0; + while ((start < perm.GetSize()) && (start == perm[start])) + { + ++start; + } + + for (unsigned int i = start; i < perm.GetSize(); ++i) + { + aclPerm.set(i - start, perm[i] - start); + } + + return aclPerm; +} + +} // namespace armcomputetensorutils +} // namespace armnn diff --git a/src/backends/ArmComputeTensorUtils.hpp b/src/backends/ArmComputeTensorUtils.hpp new file mode 100644 index 0000000000..572e310ecf --- /dev/null +++ b/src/backends/ArmComputeTensorUtils.hpp @@ -0,0 +1,199 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +#include +#include +#include + +#include + +namespace armnn +{ +class ITensorHandle; + +namespace armcomputetensorutils +{ + +/// Utility function to map an armnn::DataType to corresponding arm_compute::DataType. +arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType); + +/// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape. +arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape); + +/// Utility function used to setup an arm_compute::ITensorInfo object whose dimensions are based on the given +/// armnn::ITensorInfo. +arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo); + +/// Utility function used to setup an arm_compute::PoolingLayerInfo object from an armnn::Pooling2dDescriptor. +arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor); + +/// Utility function to setup an arm_compute::NormalizationLayerInfo object from an armnn::NormalizationDescriptor. +arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc); + +/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector. +arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector); + +/// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor. +template +arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor) +{ + return arm_compute::PadStrideInfo(descriptor.m_StrideX, + descriptor.m_StrideY, + descriptor.m_PadLeft, + descriptor.m_PadRight, + descriptor.m_PadTop, + descriptor.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); +} + +/// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor. +template +void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo) +{ + tensor.allocator()->init(BuildArmComputeTensorInfo(tensorInfo)); +} + +template +void InitialiseArmComputeTensorEmpty(Tensor& tensor) +{ + tensor.allocator()->allocate(); +} + +/// Utility function to free unused tensors after a workload is configured and prepared +template +void FreeTensorIfUnused(std::unique_ptr& tensor) +{ + if (tensor && !tensor->is_used()) + { + tensor.reset(nullptr); + } +} + +// Helper function to obtain byte offset into tensor data +inline size_t GetTensorOffset(const arm_compute::ITensorInfo& info, + uint32_t batchIndex, + uint32_t channelIndex, + uint32_t y, + uint32_t x) +{ + arm_compute::Coordinates coords; + coords.set(3, static_cast(batchIndex)); + coords.set(2, static_cast(channelIndex)); + coords.set(1, static_cast(y)); + coords.set(0, static_cast(x)); + return info.offset_element_in_bytes(coords); +} + +// Helper function to obtain element offset into data buffer representing tensor data (assuming no strides). +inline size_t GetLinearBufferOffset(const arm_compute::ITensorInfo& info, + uint32_t batchIndex, + uint32_t channelIndex, + uint32_t y, + uint32_t x) +{ + const arm_compute::TensorShape& shape = info.tensor_shape(); + uint32_t width = static_cast(shape[0]); + uint32_t height = static_cast(shape[1]); + uint32_t numChannels = static_cast(shape[2]); + return ((batchIndex * numChannels + channelIndex) * height + y) * width + x; +} + +template +void CopyArmComputeITensorData(const arm_compute::ITensor& srcTensor, T* dstData) +{ + // If MaxNumOfTensorDimensions is increased, this loop will need fixing. + static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); + { + const arm_compute::ITensorInfo& info = *srcTensor.info(); + const arm_compute::TensorShape& shape = info.tensor_shape(); + const uint8_t* const bufferPtr = srcTensor.buffer(); + uint32_t width = static_cast(shape[0]); + uint32_t height = static_cast(shape[1]); + uint32_t numChannels = static_cast(shape[2]); + uint32_t numBatches = static_cast(shape[3]); + + for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) + { + for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) + { + for (unsigned int y = 0; y < height; ++y) + { + // Copies one row from arm_compute tensor buffer to linear memory buffer. + // A row is the largest contiguous region we can copy, as the tensor data may be using strides. + memcpy(dstData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), + bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), + width * sizeof(T)); + } + } + } + } +} + +template +void CopyArmComputeITensorData(const T* srcData, arm_compute::ITensor& dstTensor) +{ + // If MaxNumOfTensorDimensions is increased, this loop will need fixing. + static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyArmComputeITensorData"); + { + const arm_compute::ITensorInfo& info = *dstTensor.info(); + const arm_compute::TensorShape& shape = info.tensor_shape(); + uint8_t* const bufferPtr = dstTensor.buffer(); + uint32_t width = static_cast(shape[0]); + uint32_t height = static_cast(shape[1]); + uint32_t numChannels = static_cast(shape[2]); + uint32_t numBatches = static_cast(shape[3]); + + for (unsigned int batchIndex = 0; batchIndex < numBatches; ++batchIndex) + { + for (unsigned int channelIndex = 0; channelIndex < numChannels; ++channelIndex) + { + for (unsigned int y = 0; y < height; ++y) + { + // Copies one row from linear memory buffer to arm_compute tensor buffer. + // A row is the largest contiguous region we can copy, as the tensor data may be using strides. + memcpy(bufferPtr + GetTensorOffset(info, batchIndex, channelIndex, y, 0), + srcData + GetLinearBufferOffset(info, batchIndex, channelIndex, y, 0), + width * sizeof(T)); + } + } + } + } +} + +/// Construct a TensorShape object from an ArmCompute object based on arm_compute::Dimensions. +/// \tparam ArmComputeType Any type that implements the Dimensions interface +/// \tparam T Shape value type +/// \param shapelike An ArmCompute object that implements the Dimensions interface +/// \param initial A default value to initialise the shape with +/// \return A TensorShape object filled from the Acl shapelike object. +template +TensorShape GetTensorShape(const ArmComputeType& shapelike, T initial) +{ + std::vector s(MaxNumOfTensorDimensions, initial); + for (unsigned int i=0; i < shapelike.num_dimensions(); ++i) + { + s[(shapelike.num_dimensions()-1)-i] = boost::numeric_cast(shapelike[i]); + } + return TensorShape(boost::numeric_cast(shapelike.num_dimensions()), s.data()); +}; + +/// Get the strides from an ACL strides object +inline TensorShape GetStrides(const arm_compute::Strides& strides) +{ + return GetTensorShape(strides, 0U); +} + +/// Get the shape from an ACL shape object +inline TensorShape GetShape(const arm_compute::TensorShape& shape) +{ + return GetTensorShape(shape, 1U); +} + +} // namespace armcomputetensorutils +} // namespace armnn diff --git a/src/backends/ArmComputeUtils.hpp b/src/backends/ArmComputeUtils.hpp new file mode 100644 index 0000000000..db472964ea --- /dev/null +++ b/src/backends/ArmComputeUtils.hpp @@ -0,0 +1,125 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#if ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED + +#include +#include + +#include + +namespace armnn +{ + +inline arm_compute::NormalizationLayerInfo +CreateAclNormalizationLayerInfoForL2Normalization(const armnn::TensorInfo& tensorInfo) +{ + const unsigned int depth = tensorInfo.GetShape()[1]; + + // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of + // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose + // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization + // parameters. + // + // Please refer to both the reference implementation of the normalization layer and the implementation of + // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below. + + // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd. + // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in + // ACL's normalization_layer_cross_map() CL function. + const uint32_t normSize = depth * 2u + 1u; + + // See ACL's NormalizationLayerInfo::scale_coeff() definition. + // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead. + const float alpha = 1.0f; + + // Don't offset the reduction. + const float kappa = 0.0f; + + // pow(reduction, -0.5) = 1 / sqrt(reduction) + const float beta = 0.5f; + + return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false); +} + +inline arm_compute::ActivationLayerInfo::ActivationFunction +ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction) +{ + using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction; + + switch (armnnFunction) + { + case ActivationFunction::Linear: return AclActivationFunction::LINEAR; + // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function. + case ActivationFunction::Sigmoid: return AclActivationFunction::LOGISTIC; + case ActivationFunction::ReLu: return AclActivationFunction::RELU; + case ActivationFunction::BoundedReLu: return AclActivationFunction::LU_BOUNDED_RELU; + case ActivationFunction::SoftReLu: return AclActivationFunction::SOFT_RELU; + case ActivationFunction::LeakyReLu: return AclActivationFunction::LEAKY_RELU; + case ActivationFunction::Abs: return AclActivationFunction::ABS; + case ActivationFunction::Sqrt: return AclActivationFunction::SQRT; + case ActivationFunction::Square: return AclActivationFunction::SQUARE; + case ActivationFunction::TanH: return AclActivationFunction::TANH; + default: throw InvalidArgumentException("Unsupported activation function"); + } +} + +inline arm_compute::ActivationLayerInfo +ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor& actDesc) +{ + return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function), + actDesc.m_A, actDesc.m_B); +} + +inline arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm) +{ + using arm_compute::PoolingType; + + switch (poolingAlgorithm) + { + case PoolingAlgorithm::Max: return PoolingType::MAX; + case PoolingAlgorithm::Average: return PoolingType::AVG; + case PoolingAlgorithm::L2: return PoolingType::L2; + default: throw InvalidArgumentException("Unsupported pooling algorithm"); + } +} + +inline arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding + rounding) +{ + using arm_compute::DimensionRoundingType; + + switch (rounding) + { + case OutputShapeRounding::Ceiling: return DimensionRoundingType::CEIL; + case OutputShapeRounding::Floor: return DimensionRoundingType::FLOOR; + default: throw InvalidArgumentException("Unsupported Output Shape Rounding type"); + } +} + +inline arm_compute::NormType +ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType) +{ + using arm_compute::NormType; + switch (channelType) + { + case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP; + case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D; + default: throw InvalidArgumentException("Unsupported normalization algorithm channel type"); + } +} + +inline arm_compute::FullyConnectedLayerInfo +ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc) +{ + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix; + return fc_info; +} + +} + +#endif // ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED diff --git a/src/backends/CMakeLists.txt b/src/backends/CMakeLists.txt new file mode 100644 index 0000000000..4a949c04f7 --- /dev/null +++ b/src/backends/CMakeLists.txt @@ -0,0 +1,74 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnBackendsCommon_sources + ClContextControl.cpp + ClContextControl.hpp + ClLayerSupport.cpp + ClLayerSupport.hpp + ClWorkloadFactory.cpp + ClWorkloadFactory.hpp + ClWorkloads.hpp + CpuTensorHandle.cpp + CpuTensorHandleFwd.hpp + CpuTensorHandle.hpp + ITensorHandle.hpp + MakeWorkloadHelper.hpp + MemCopyWorkload.cpp + MemCopyWorkload.hpp + NeonLayerSupport.cpp + NeonLayerSupport.hpp + NeonWorkloadFactory.cpp + NeonWorkloadFactory.hpp + NeonWorkloads.hpp + OutputHandler.cpp + OutputHandler.hpp + RefLayerSupport.cpp + RefLayerSupport.hpp + RefWorkloadFactory.cpp + RefWorkloadFactory.hpp + RefWorkloads.hpp + StringMapping.cpp + StringMapping.hpp + WorkloadDataCollector.hpp + WorkloadData.cpp + WorkloadDataFwd.hpp + WorkloadData.hpp + WorkloadFactory.cpp + WorkloadFactory.hpp + Workload.hpp + WorkloadInfo.hpp + WorkloadUtils.hpp +) + +if(ARMCOMPUTENEON OR ARMCOMPUTECL) + # Files shared by all ARM Compute backends + list(APPEND armnnBackendsCommon_sources + ArmComputeTensorUtils.hpp + ArmComputeTensorUtils.cpp + ArmComputeUtils.hpp + ) +endif() + +if(ARMCOMPUTENEON) + # Additionally include source files for ARM Compute NEON backend + list(APPEND armnnBackendsCommon_sources + NeonWorkloadUtils.cpp + NeonWorkloadUtils.hpp + NeonTensorHandle.hpp + ) +endif() + +if(ARMCOMPUTECL) + # Additionally include source files for ARM Compute OpenCL backend + list(APPEND armnnBackendsCommon_sources + ClTensorHandle.hpp + ) +endif() + +add_library(armnnBackendsCommon STATIC ${armnnBackendsCommon_sources}) +target_include_directories(armnnBackendsCommon PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnBackendsCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnBackendsCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/ClContextControl.cpp b/src/backends/ClContextControl.cpp new file mode 100644 index 0000000000..e8b21c942d --- /dev/null +++ b/src/backends/ClContextControl.cpp @@ -0,0 +1,235 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClContextControl.hpp" + +#include "armnn/Exceptions.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include +#include +#endif + +#include +#include +#include +#include +#include + +#include "LeakChecking.hpp" + +namespace cl +{ +class Context; +class CommandQueue; +class Device; +} + +namespace armnn +{ + +ClContextControl::ClContextControl(IGpuAccTunedParameters* clTunedParameters, + bool profilingEnabled) + : m_clTunedParameters(boost::polymorphic_downcast(clTunedParameters)) + , m_ProfilingEnabled(profilingEnabled) +{ + // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. + boost::ignore_unused(m_ProfilingEnabled); + +#ifdef ARMCOMPUTECL_ENABLED + try + { + std::vector platforms; + cl::Platform::get(&platforms); + + // Selects default platform for the first element. + cl::Platform::setDefault(platforms[0]); + + std::vector devices; + platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); + + // Selects default device for the first element. + cl::Device::setDefault(devices[0]); + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Removes the use of global CL context. + cl::Context::setDefault(cl::Context{}); + BOOST_ASSERT(cl::Context::getDefault()() == NULL); + + // Removes the use of global CL command queue. + cl::CommandQueue::setDefault(cl::CommandQueue{}); + BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); + + // Always load the OpenCL runtime. + LoadOpenClRuntime(); +#endif +} + +ClContextControl::~ClContextControl() +{ +#ifdef ARMCOMPUTECL_ENABLED + // Load the OpencCL runtime without the tuned parameters to free the memory for them. + try + { + UnloadOpenClRuntime(); + } + catch (const cl::Error& clError) + { + // This should not happen, it is ignored if it does. + + // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an + // exception of type std::length_error. + // Using stderr instead in this context as there is no point in nesting try-catch blocks here. + std::cerr << "A CL error occurred unloading the runtime tuner parameters: " + << clError.what() << ". CL error code is: " << clError.err() << std::endl; + } +#endif +} + +void ClContextControl::LoadOpenClRuntime() +{ + DoLoadOpenClRuntime(true); +} + +void ClContextControl::UnloadOpenClRuntime() +{ + DoLoadOpenClRuntime(false); +} + +void ClContextControl::DoLoadOpenClRuntime(bool useTunedParameters) +{ +#ifdef ARMCOMPUTECL_ENABLED + cl::Device device = cl::Device::getDefault(); + cl::Context context; + cl::CommandQueue commandQueue; + + if (arm_compute::CLScheduler::get().context()() != NULL) + { + // Wait for all queued CL requests to finish before reinitialising it. + arm_compute::CLScheduler::get().sync(); + } + + try + { + arm_compute::CLKernelLibrary::get().clear_programs_cache(); + // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no + // context references); it is initialised again, with a proper context, later. + arm_compute::CLScheduler::get().init(context, commandQueue, device); + arm_compute::CLKernelLibrary::get().init(".", context, device); + + { + // + // Here we replace the context with a new one in which + // the memory leak checks show it as an extra allocation but + // because of the scope of the leak checks, it doesn't count + // the disposal of the original object. On the other hand it + // does count the creation of this context which it flags + // as a memory leak. By adding the following line we prevent + // this to happen. + // + ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); + context = cl::Context(device); + } + + // NOTE: In this specific case profiling has to be enabled on the command queue + // in order for the CLTuner to work. + bool profilingNeededForClTuner = useTunedParameters && m_clTunedParameters && + m_clTunedParameters->m_Mode == IGpuAccTunedParameters::Mode::UpdateTunedParameters; + + if (m_ProfilingEnabled || profilingNeededForClTuner) + { + // Create a new queue with profiling enabled. + commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); + } + else + { + // Use default queue. + commandQueue = cl::CommandQueue(context, device); + } + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. + arm_compute::CLKernelLibrary::get().init(".", context, device); + + arm_compute::ICLTuner* tuner = nullptr; + if (useTunedParameters && m_clTunedParameters) + { + tuner = &m_clTunedParameters->m_Tuner; + } + arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); +#endif +} + +void ClContextControl::ClearClCache() +{ + DoLoadOpenClRuntime(true); +} + +armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode) +{ + return new ClTunedParameters(mode); +} + +armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode) +{ + return IGpuAccTunedParametersPtr(CreateRaw(mode), &IGpuAccTunedParameters::Destroy); +} + +void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) +{ + delete params; +} + +ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode) + : m_Mode(mode) +#ifdef ARMCOMPUTECL_ENABLED + , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) +#endif +{ +} + +void ClTunedParameters::Load(const char* filename) +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.load_from_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + + e.what()); + } +#endif +} + +void ClTunedParameters::Save(const char* filename) const +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.save_to_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + + e.what()); + } +#endif +} + +} // namespace armnn diff --git a/src/backends/ClContextControl.hpp b/src/backends/ClContextControl.hpp new file mode 100644 index 0000000000..5ac56423bd --- /dev/null +++ b/src/backends/ClContextControl.hpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "armnn/IRuntime.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include +#endif + +namespace armnn +{ + +class IGpuAccTunedParameters; +class ClTunedParameters; + +// ARM Compute OpenCL context control. +class ClContextControl +{ +public: + + ClContextControl(IGpuAccTunedParameters* clTunedParameters = nullptr, + bool profilingEnabled = false); + + virtual ~ClContextControl(); + + void LoadOpenClRuntime(); + + // Users should call this (after freeing all of the cl::Context objects they use) + // to release the cached memory used by the compute library. + void UnloadOpenClRuntime(); + + // Clear the CL cache, without losing the tuned parameter settings. + void ClearClCache(); + +private: + + void DoLoadOpenClRuntime(bool useTunedParameters); + + ClTunedParameters* m_clTunedParameters; + + bool m_ProfilingEnabled; +}; + +class ClTunedParameters : public IGpuAccTunedParameters +{ +public: + ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode); + + virtual void Load(const char* filename); + virtual void Save(const char* filename) const; + + Mode m_Mode; + +#ifdef ARMCOMPUTECL_ENABLED + arm_compute::CLTuner m_Tuner; +#endif +}; + +} // namespace armnn diff --git a/src/backends/ClLayerSupport.cpp b/src/backends/ClLayerSupport.cpp new file mode 100644 index 0000000000..30a1330706 --- /dev/null +++ b/src/backends/ClLayerSupport.cpp @@ -0,0 +1,468 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayerSupportCommon.hpp" + +#include "ClLayerSupport.hpp" +#include "InternalTypes.hpp" +#include +#include +#include + +#include + +#ifdef ARMCOMPUTECL_ENABLED +#include "ClWorkloads/ClAdditionWorkload.hpp" +#include "ClWorkloads/ClActivationFloatWorkload.hpp" +#include "ClWorkloads/ClBatchNormalizationFloatWorkload.hpp" +#include "ClWorkloads/ClConvertFp16ToFp32Workload.hpp" +#include "ClWorkloads/ClConvertFp32ToFp16Workload.hpp" +#include "ClWorkloads/ClConvolution2dBaseWorkload.hpp" +#include "ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp" +#include "ClWorkloads/ClDivisionFloatWorkload.hpp" +#include "ClWorkloads/ClL2NormalizationFloatWorkload.hpp" +#include "ClWorkloads/ClMultiplicationFloatWorkload.hpp" +#include "ClWorkloads/ClFullyConnectedWorkload.hpp" +#include "ClWorkloads/ClPooling2dBaseWorkload.hpp" +#include "ClWorkloads/ClPermuteWorkload.hpp" +#include "ClWorkloads/ClNormalizationFloatWorkload.hpp" +#include "ClWorkloads/ClSoftmaxBaseWorkload.hpp" +#include "ClWorkloads/ClSubtractionWorkload.hpp" +#include "ClWorkloads/ClLstmFloatWorkload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ +namespace +{ +template +bool IsMatchingSize2d(const TensorInfo& weightInfo) +{ + // Width & Height must match. + return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); +} + +template +bool IsMatchingStride(uint32_t actualStride) +{ + return ValidStride == actualStride; +} + +template +bool IsMatchingStride(uint32_t actualStride) +{ + return IsMatchingStride(actualStride) || IsMatchingStride(actualStride); +}; + +bool IsClBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTECL_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without CL support"; + } + return false; +#endif +} + +#if ARMCOMPUTECL_ENABLED +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) +#else +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) +#endif + +#if ARMCOMPUTECL_ENABLED +template +inline bool IsWorkloadSupported(FuncType&& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsClBackendSupported(reasonIfUnsupported); +#endif + +} //namespace + +template +bool IsSupportedForDataTypeCl(std::string* reasonIfUnsupported, + DataType dataType, + FloatFunc floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + floatFuncPtr, + uint8FuncPtr, + std::forward(params)...); +} + +bool IsActivationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClActivationWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClAdditionValidate(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchNormalizationValidate, + reasonIfUnsupported, + input, + output, + mean, + var, + beta, + gamma, + descriptor); +} + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + bool isSupported = false; + + bool strideXIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideX); + bool strideXIsThree = IsMatchingStride<3>(desc.m_StrideX); + + bool strideYIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideY); + bool strideYIsThree = IsMatchingStride<3>(desc.m_StrideY); + + bool strideIsOneOrTwo = strideXIsOneOrTwo && strideYIsOneOrTwo; + bool strideIsOneOrTwoOrThree = ( strideXIsOneOrTwo || strideXIsThree ) && ( strideYIsOneOrTwo || strideYIsThree ); + + // 1x1 convolution with strides of 1,2,3. + isSupported |= IsMatchingSize2d<1>(weightInfo) && ( strideIsOneOrTwoOrThree ); + + // 3x3 convolution with strides of 1,2. + isSupported |= IsMatchingSize2d<3>(weightInfo) && ( strideIsOneOrTwo ); + + // 5x5 convolution with strides of 1,2 + isSupported |= IsMatchingSize2d<5>(weightInfo) && ( strideIsOneOrTwo ); + + //Fall back to normal convolution for the asymmetric padding case. + if (desc.m_PadLeft != desc.m_PadRight || + desc.m_PadTop != desc.m_PadBottom) + { + //Direct convolution does not support asymmetric padding yet. + isSupported = false; + } + + return isSupported; +} + +bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported, + const Convolution2dDescriptor& parameters, + const TensorInfo& weightInfo) +{ + return IsClDirectConvolution2dSupported(weightInfo, parameters); +} + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsSubtractionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClSubtractionValidate(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClFullyConnectedWorkloadValidate, + reasonIfUnsupported, + input, + output, + weights, + biases, + descriptor); +} + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output); +} + +bool IsMergerSupportedCl(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, descriptor); +} + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output); +} + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return true; +} + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, reasonIfUnsupported, + input, outputStateIn, cellStateIn, scratchBuffer, outputStateOut, cellStateOut, + output, descriptor, inputToForgetWeights, inputToCellWeights, + inputToOutputWeights, recurrentToForgetWeights, + recurrentToCellWeights, recurrentToOutputWeights, + forgetGateBias, cellBias, outputGateBias, + inputToInputWeights, recurrentToInputWeights, + cellToInputWeights, inputGateBias, projectionWeights, + projectionBias, cellToForgetWeights, cellToOutputWeights); +} + +bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp16ToFp32WorkloadValidate, + reasonIfUnsupported, + input, + output, + reasonIfUnsupported); +} + +bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp32ToFp16WorkloadValidate, + reasonIfUnsupported, + input, + output, + reasonIfUnsupported); +} + +bool IsMeanSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/ClLayerSupport.hpp b/src/backends/ClLayerSupport.hpp new file mode 100644 index 0000000000..f5c1226e56 --- /dev/null +++ b/src/backends/ClLayerSupport.hpp @@ -0,0 +1,158 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include + +namespace armnn +{ +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); +bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsActivationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedCl(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/ClTensorHandle.hpp b/src/backends/ClTensorHandle.hpp new file mode 100644 index 0000000000..9c78192284 --- /dev/null +++ b/src/backends/ClTensorHandle.hpp @@ -0,0 +1,136 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OutputHandler.hpp" +#include "ArmComputeTensorUtils.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +namespace armnn +{ + + +class IClTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ICLTensor& GetTensor() = 0; + virtual arm_compute::ICLTensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; +}; + +class ClTensorHandle : public IClTensorHandle +{ +public: + ClTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + arm_compute::CLTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);} + + virtual void Manage() override + { + assert(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } + + virtual const void* Map(bool blocking = true) const override + { + const_cast(&m_Tensor)->map(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast(&m_Tensor)->unmap(); } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override + { + m_MemoryGroup = boost::polymorphic_pointer_downcast(memoryGroup); + } + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } +private: + arm_compute::CLTensor m_Tensor; + std::shared_ptr m_MemoryGroup; +}; + +class ClSubTensorHandle : public IClTensorHandle +{ +public: + ClSubTensorHandle(IClTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + const_cast(&m_Tensor)->map(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast(&m_Tensor)->unmap(); } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr&) override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + mutable arm_compute::CLSubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; + +}; + +} diff --git a/src/backends/ClWorkloadFactory.cpp b/src/backends/ClWorkloadFactory.cpp new file mode 100644 index 0000000000..591fb85dbb --- /dev/null +++ b/src/backends/ClWorkloadFactory.cpp @@ -0,0 +1,490 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "ClWorkloadFactory.hpp" + +#include "armnn/Exceptions.hpp" +#include "armnn/Utils.hpp" + +#include +#include "CpuTensorHandle.hpp" +#include "Layer.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include +#include +#include + +#include "ClWorkloads.hpp" + +#include "backends/MemCopyWorkload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "memory/IPoolManager.hpp" +#endif + +#include "MakeWorkloadHelper.hpp" + +#include +#include +#include + +namespace armnn +{ + +bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, + boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTECL_ENABLED + +ClWorkloadFactory::ClWorkloadFactory() +: m_MemoryManager(std::make_unique()) +{ +} + +std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + std::unique_ptr tensorHandle = std::make_unique(tensorInfo); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::CL); + + arm_compute::Coordinates coords; + arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // Arm compute indexes tensor coords in reverse order. + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast(subTensorOrigin[revertedIndex])); + } + + return std::make_unique( + boost::polymorphic_downcast(&parent), shape, coords); +} + +std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload, + ClAdditionWorkload>(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload, + ClSubtractionWorkload>(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +void ClWorkloadFactory::Finalize() +{ + m_MemoryManager.Finalize(); +} + +void ClWorkloadFactory::Release() +{ + m_MemoryManager.Release(); +} + +void ClWorkloadFactory::Acquire() +{ + m_MemoryManager.Acquire(); +} + +#else // #if ARMCOMPUTECL_ENABLED + +ClWorkloadFactory::ClWorkloadFactory() +{ +} + +std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +void ClWorkloadFactory::Finalize() +{ +} + +void ClWorkloadFactory::Release() +{ +} + +void ClWorkloadFactory::Acquire() +{ +} + +#endif // #if ARMCOMPUTECL_ENABLED + +} // namespace armnn diff --git a/src/backends/ClWorkloadFactory.hpp b/src/backends/ClWorkloadFactory.hpp new file mode 100644 index 0000000000..892d564fbb --- /dev/null +++ b/src/backends/ClWorkloadFactory.hpp @@ -0,0 +1,136 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OutputHandler.hpp" + +#include "armnn/IRuntime.hpp" +#include + +#include "memory/BaseMemoryManager.hpp" + +namespace armnn +{ + +// ARM Compute OpenCL workload factory. +class ClWorkloadFactory : public IWorkloadFactory +{ +public: + ClWorkloadFactory(); + + virtual Compute GetCompute() const override { return Compute::GpuAcc; } + + static bool IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual void Finalize() override; + + virtual void Release() override; + + virtual void Acquire() override; + +private: + +#ifdef ARMCOMPUTECL_ENABLED + mutable ClMemoryManager m_MemoryManager; +#endif +}; + +} // namespace armnn diff --git a/src/backends/ClWorkloads.hpp b/src/backends/ClWorkloads.hpp new file mode 100644 index 0000000000..2bbda8a62b --- /dev/null +++ b/src/backends/ClWorkloads.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "backends/ClWorkloads/ClActivationFloatWorkload.hpp" +#include "backends/ClWorkloads/ClActivationUint8Workload.hpp" +#include "backends/ClWorkloads/ClAdditionWorkload.hpp" +#include "backends/ClWorkloads/ClBaseConstantWorkload.hpp" +#include "backends/ClWorkloads/ClBaseMergerWorkload.hpp" +#include "backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp" +#include "backends/ClWorkloads/ClConstantFloatWorkload.hpp" +#include "backends/ClWorkloads/ClConstantUint8Workload.hpp" +#include "backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp" +#include "backends/ClWorkloads/ClConvolution2dUint8Workload.hpp" +#include "backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp" +#include "backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp" +#include "backends/ClWorkloads/ClDivisionFloatWorkload.hpp" +#include "backends/ClWorkloads/ClFloorFloatWorkload.hpp" +#include "backends/ClWorkloads/ClFullyConnectedWorkload.hpp" +#include "backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp" +#include "backends/ClWorkloads/ClLstmFloatWorkload.hpp" +#include "backends/ClWorkloads/ClMergerFloatWorkload.hpp" +#include "backends/ClWorkloads/ClMergerUint8Workload.hpp" +#include "backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp" +#include "backends/ClWorkloads/ClNormalizationFloatWorkload.hpp" +#include "backends/ClWorkloads/ClPermuteWorkload.hpp" +#include "backends/ClWorkloads/ClPooling2dFloatWorkload.hpp" +#include "backends/ClWorkloads/ClPooling2dUint8Workload.hpp" +#include "backends/ClWorkloads/ClReshapeFloatWorkload.hpp" +#include "backends/ClWorkloads/ClReshapeUint8Workload.hpp" +#include "backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp" +#include "backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp" +#include "backends/ClWorkloads/ClSoftmaxUint8Workload.hpp" +#include "backends/ClWorkloads/ClSplitterFloatWorkload.hpp" +#include "backends/ClWorkloads/ClSplitterUint8Workload.hpp" +#include "backends/ClWorkloads/ClSubtractionWorkload.hpp" +#include "backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp" +#include "backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp" diff --git a/src/backends/ClWorkloads/CMakeLists.txt b/src/backends/ClWorkloads/CMakeLists.txt new file mode 100644 index 0000000000..ac935b5cf7 --- /dev/null +++ b/src/backends/ClWorkloads/CMakeLists.txt @@ -0,0 +1,89 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnClBackend_sources + ClActivationFloatWorkload.cpp + ClActivationFloatWorkload.hpp + ClActivationUint8Workload.cpp + ClActivationUint8Workload.hpp + ClAdditionWorkload.cpp + ClAdditionWorkload.hpp + ClBaseConstantWorkload.cpp + ClBaseConstantWorkload.hpp + ClBaseMergerWorkload.hpp + ClBaseSplitterWorkload.hpp + ClBatchNormalizationFloatWorkload.cpp + ClBatchNormalizationFloatWorkload.hpp + ClConstantFloatWorkload.cpp + ClConstantFloatWorkload.hpp + ClConstantUint8Workload.cpp + ClConstantUint8Workload.hpp + ClConvertFp16ToFp32Workload.cpp + ClConvertFp16ToFp32Workload.hpp + ClConvertFp32ToFp16Workload.cpp + ClConvertFp32ToFp16Workload.hpp + ClConvolution2dBaseWorkload.cpp + ClConvolution2dBaseWorkload.hpp + ClConvolution2dFloatWorkload.cpp + ClConvolution2dFloatWorkload.hpp + ClConvolution2dUint8Workload.cpp + ClConvolution2dUint8Workload.hpp + ClDepthwiseConvolutionBaseWorkload.cpp + ClDepthwiseConvolutionBaseWorkload.hpp + ClDepthwiseConvolutionFloatWorkload.cpp + ClDepthwiseConvolutionFloatWorkload.hpp + ClDepthwiseConvolutionUint8Workload.cpp + ClDepthwiseConvolutionUint8Workload.hpp + ClDivisionFloatWorkload.cpp + ClDivisionFloatWorkload.hpp + ClFloorFloatWorkload.cpp + ClFloorFloatWorkload.hpp + ClFullyConnectedWorkload.cpp + ClFullyConnectedWorkload.hpp + ClL2NormalizationFloatWorkload.cpp + ClL2NormalizationFloatWorkload.hpp + ClLstmFloatWorkload.cpp + ClLstmFloatWorkload.hpp + ClMergerFloatWorkload.cpp + ClMergerFloatWorkload.hpp + ClMergerUint8Workload.cpp + ClMergerUint8Workload.hpp + ClMultiplicationFloatWorkload.cpp + ClMultiplicationFloatWorkload.hpp + ClNormalizationFloatWorkload.cpp + ClNormalizationFloatWorkload.hpp + ClPermuteWorkload.cpp + ClPermuteWorkload.hpp + ClPooling2dBaseWorkload.cpp + ClPooling2dBaseWorkload.hpp + ClPooling2dFloatWorkload.cpp + ClPooling2dFloatWorkload.hpp + ClPooling2dUint8Workload.cpp + ClPooling2dUint8Workload.hpp + ClReshapeFloatWorkload.cpp + ClReshapeFloatWorkload.hpp + ClReshapeUint8Workload.cpp + ClReshapeUint8Workload.hpp + ClResizeBilinearFloatWorkload.cpp + ClResizeBilinearFloatWorkload.hpp + ClSoftmaxBaseWorkload.cpp + ClSoftmaxBaseWorkload.hpp + ClSoftmaxFloatWorkload.cpp + ClSoftmaxFloatWorkload.hpp + ClSoftmaxUint8Workload.cpp + ClSoftmaxUint8Workload.hpp + ClSplitterFloatWorkload.cpp + ClSplitterFloatWorkload.hpp + ClSplitterUint8Workload.cpp + ClSplitterUint8Workload.hpp + ClSubtractionWorkload.cpp + ClSubtractionWorkload.hpp + ClWorkloadUtils.hpp +) + +add_library(armnnClBackend STATIC ${armnnClBackend_sources}) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp b/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp new file mode 100644 index 0000000000..97078bddd8 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "CL: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::CLActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void ClActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp b/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp new file mode 100644 index 0000000000..e1b6fe13d8 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +// Activation layer execution. +class ClActivationFloatWorkload : public FloatWorkload +{ +public: + ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/backends/ClWorkloads/ClActivationUint8Workload.cpp new file mode 100644 index 0000000000..f39c856aa9 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationUint8Workload.cpp @@ -0,0 +1,44 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationUint8Workload.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "backends/ArmComputeUtils.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void ClActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} + +} //namespace Armnn + + diff --git a/src/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/backends/ClWorkloads/ClActivationUint8Workload.hpp new file mode 100644 index 0000000000..bb2ff58853 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Activation layer execution. +class ClActivationUint8Workload : public Uint8Workload +{ +public: + ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClAdditionWorkload.cpp b/src/backends/ClWorkloads/ClAdditionWorkload.cpp new file mode 100644 index 0000000000..dd439d59a9 --- /dev/null +++ b/src/backends/ClWorkloads/ClAdditionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClAdditionWorkload.hpp" + +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template +ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template +void ClAdditionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); + m_Layer.run(); +} + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClAdditionWorkload; +template class armnn::ClAdditionWorkload; diff --git a/src/backends/ClWorkloads/ClAdditionWorkload.hpp b/src/backends/ClWorkloads/ClAdditionWorkload.hpp new file mode 100644 index 0000000000..b4706890d1 --- /dev/null +++ b/src/backends/ClWorkloads/ClAdditionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +template +class ClAdditionWorkload : public TypedWorkload +{ +public: + ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticAddition m_Layer; +}; + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp new file mode 100644 index 0000000000..021d17512f --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp @@ -0,0 +1,64 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBaseConstantWorkload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "Half.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +template class ClBaseConstantWorkload; +template class ClBaseConstantWorkload; + +template +void ClBaseConstantWorkload::Execute() const +{ + // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data + // on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not + // have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::CLTensor& output = static_cast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = static_cast(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp new file mode 100644 index 0000000000..ca1db389dc --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ +template +class ClBaseConstantWorkload : public TypedWorkload +{ +public: + ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp new file mode 100644 index 0000000000..420e074217 --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Merger layer common to all data types. +template +class ClBaseMergerWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..41f382cac8 --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template +class ClBaseSplitterWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..021734aaa6 --- /dev/null +++ b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBatchNormalizationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor &desc) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + desc.m_Eps); +} + +ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Mean = std::make_unique(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeClTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeClTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeClTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + InitializeArmComputeClTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void ClBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..22c71b1073 --- /dev/null +++ b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& desc); + +class ClBatchNormalizationFloatWorkload : public FloatWorkload +{ +public: + ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLBatchNormalizationLayer m_Layer; + + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Gamma; + std::unique_ptr m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp b/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp new file mode 100644 index 0000000000..1565047c22 --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantFloatWorkload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp b/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp new file mode 100644 index 0000000000..0cbeaad9ea --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ +class ClConstantFloatWorkload : public ClBaseConstantWorkload +{ +public: + using ClBaseConstantWorkload::ClBaseConstantWorkload; + void Execute() const override; +}; + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/backends/ClWorkloads/ClConstantUint8Workload.cpp new file mode 100644 index 0000000000..a5ef0321cd --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantUint8Workload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantUint8Workload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/backends/ClWorkloads/ClConstantUint8Workload.hpp new file mode 100644 index 0000000000..30556dc0d6 --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ + +class ClConstantUint8Workload : public ClBaseConstantWorkload +{ +public: + using ClBaseConstantWorkload::ClBaseConstantWorkload; + void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..534249aeac --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp16ToFp32Workload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( + const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float16ToFloat32Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Input should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Output should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..c72d2262b3 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + + ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..73b3cbc542 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp32ToFp16Workload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( + const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float32ToFloat16Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Input should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Output should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..fb6af02070 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + + ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..228f17d54e --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dBaseWorkload.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +} diff --git a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..a983dba79a --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..029f41d5dc --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + + // todo: check tensor shapes match. + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..28ba53f38a --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include + +#include + +namespace armnn +{ + +class ClConvolution2dFloatWorkload : public FloatWorkload +{ +public: + ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..e6783b698a --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + // todo: check tensor shapes match + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor()); + + if (m_BiasTensor) + { + InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor()); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..f1f008b1b9 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include + +#include + +namespace armnn +{ + +class ClConvolution2dUint8Workload : public Uint8Workload +{ +public: + ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..0e89a68118 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,125 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +#include "TypeUtils.hpp" + +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +template +ClDepthwiseConvolutionBaseWorkload::ClDepthwiseConvolutionBaseWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + std::string name = std::string("ClDepthwiseConvolution") + + GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload"; + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + + //Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); + if (use3x3Optimisation) + { + m_DepthwiseConvolutionLayer = std::make_unique(); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + else + { + m_DepthwiseConvolutionLayer = std::make_unique(); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + + BOOST_ASSERT(m_DepthwiseConvolutionLayer); +} + +template +void ClDepthwiseConvolutionBaseWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generate known implementations for linker +template class ClDepthwiseConvolutionBaseWorkload; +template class ClDepthwiseConvolutionBaseWorkload; + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..49a8b5d357 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +template +class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + +protected: + std::unique_ptr m_DepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..635ae1f327 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionFloatWorkload.hpp" + +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..4f9d5f332e --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload +{ +public: + ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..af5836e908 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionUint8Workload.hpp" + +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + + if (m_BiasTensor) + { + InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..b9f676de94 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,23 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload +{ +public: + ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp b/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp new file mode 100644 index 0000000000..2371789035 --- /dev/null +++ b/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDivisionFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); +} + + +ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_ArithmeticDivision.configure(&input0, &input1, &output); +} + +void ClDivisionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); + + // Executes the layer. + m_ArithmeticDivision.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp b/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp new file mode 100644 index 0000000000..d34e11dab8 --- /dev/null +++ b/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClDivisionFloatWorkload : public FloatWorkload +{ +public: + ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const + WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp b/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp new file mode 100644 index 0000000000..d090a7da81 --- /dev/null +++ b/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFloorFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp b/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp new file mode 100644 index 0000000000..f269bcf30c --- /dev/null +++ b/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClFloorFloatWorkload : public FloatWorkload +{ +public: + ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp b/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..8d2fd0e909 --- /dev/null +++ b/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp @@ -0,0 +1,111 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFullyConnectedWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + return arm_compute::CLFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : BaseWorkload(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) + { + InitialiseArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor()); + } + else + { + InitializeArmComputeClTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + } + + if (m_BiasesTensor) + { + if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) + { + InitialiseArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor()); + } + else + { + InitializeArmComputeClTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void ClFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void ClFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp b/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..a61610992e --- /dev/null +++ b/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class ClFullyConnectedWorkload : public armnn::BaseWorkload +{ +public: + ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + std::shared_ptr& memoryManager); + + using armnn::BaseWorkload::m_Data; + void Execute() const override; + +private: + mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..4ccaae3430 --- /dev/null +++ b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClL2NormalizationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void ClL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f3f7de110a --- /dev/null +++ b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class ClL2NormalizationFloatWorkload : public FloatWorkload +{ +public: + ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + // Purposely not a CLL2Normalize function. See constructor. + mutable arm_compute::CLNormalizationLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp b/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp new file mode 100644 index 0000000000..09a34c2d02 --- /dev/null +++ b/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp @@ -0,0 +1,408 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClLstmFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) + : FloatWorkload(descriptor, info) +{ + arm_compute::LSTMParams lstm_param; + + // Basic parameters + m_InputToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); + + m_InputToCellWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); + + m_InputToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); + + m_RecurrentToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); + + m_RecurrentToCellWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); + + m_RecurrentToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); + + m_ForgetGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); + + m_CellBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); + + m_OutputGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); + + // for future reference: check the AndroidNN API for the logic here + if (!m_Data.m_Parameters.m_CifgEnabled) + { + m_InputToInputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); + + m_RecurrentToInputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); + + m_CellToInputWeightsTensor = std::make_unique(); + if (m_Data.m_CellToInputWeights != nullptr) + { + BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); + } + + m_InputGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); + + lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), + m_RecurrentToInputWeightsTensor.get(), + m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, + m_InputGateBiasTensor.get()); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + m_ProjectionWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); + + m_ProjectionBiasTensor = std::make_unique(); + if (m_Data.m_ProjectionBias != nullptr) + { + BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); + } + + lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), + m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + m_CellToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); + + m_CellToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); + + lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); + } + + const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + const arm_compute::ICLTensor& output_state_in = static_cast(m_Data.m_Inputs[1])->GetTensor(); + const arm_compute::ICLTensor& cell_state_in = static_cast(m_Data.m_Inputs[2])->GetTensor(); + + arm_compute::ICLTensor& output_state_out = static_cast(m_Data.m_Outputs[1])->GetTensor(); + arm_compute::ICLTensor& cell_state_out = static_cast(m_Data.m_Outputs[2])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[3])->GetTensor(); + + // Get the batch_size and the num_units from the cellStateIn dimensions + const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; + const unsigned int batch_size = boost::numeric_cast(inputTensorInfo.GetShape()[0]); + const unsigned int num_units = boost::numeric_cast(inputTensorInfo.GetShape()[1]); + + m_ScratchBuffer = std::make_unique(); + if (m_Data.m_Parameters.m_CifgEnabled) + { + // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG + armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); + } + else + { + // scratch_buffer [num_units * 3, batch_size] without CIFG + armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); + } + + float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; + float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (m_Data.m_Parameters.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (m_Data.m_Parameters.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + + m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), + m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), + m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), + m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), + &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, + &cell_state_out, &output, lstm_param, activationLayerInfo, + cell_threshold, projection_threshold); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); + + InitialiseArmComputeClTensorData(*m_InputToForgetWeightsTensor, + m_Data.m_InputToForgetWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_InputToCellWeightsTensor, + m_Data.m_InputToCellWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_InputToOutputWeightsTensor, + m_Data.m_InputToOutputWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, + m_Data.m_RecurrentToForgetWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, + m_Data.m_RecurrentToCellWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, + m_Data.m_RecurrentToOutputWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_ForgetGateBiasTensor, + m_Data.m_ForgetGateBias->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_CellBiasTensor, + m_Data.m_CellBias->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_OutputGateBiasTensor, + m_Data.m_OutputGateBias->GetConstTensor()); + + if (!m_Data.m_Parameters.m_CifgEnabled) + { + InitialiseArmComputeClTensorData(*m_InputToInputWeightsTensor, + m_Data.m_InputToInputWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, + m_Data.m_RecurrentToInputWeights->GetConstTensor()); + if (m_Data.m_CellToInputWeights != nullptr) + { + InitialiseArmComputeClTensorData(*m_CellToInputWeightsTensor, + m_Data.m_CellToInputWeights->GetConstTensor()); + } + InitialiseArmComputeClTensorData(*m_InputGateBiasTensor, + m_Data.m_InputGateBias->GetConstTensor()); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + InitialiseArmComputeClTensorData(*m_ProjectionWeightsTensor, + m_Data.m_ProjectionWeights->GetConstTensor()); + if (m_Data.m_ProjectionBias != nullptr) + { + InitialiseArmComputeClTensorData(*m_ProjectionBiasTensor, + m_Data.m_ProjectionBias->GetConstTensor()); + } + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + InitialiseArmComputeClTensorData(*m_CellToForgetWeightsTensor, + m_Data.m_CellToForgetWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_CellToOutputWeightsTensor, + m_Data.m_CellToOutputWeights->GetConstTensor()); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_LstmLayer.prepare(); + FreeUnusedTensors(); +} + +void ClLstmFloatWorkload::Execute() const +{ + m_LstmLayer.run(); +} + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights) +{ + arm_compute::LSTMParams lstm_params_info; + + // The inputs and the outputs + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); + const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); + const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); + const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); + const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + // Basic parameters + const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights); + const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights); + const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights); + const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo + = BuildArmComputeTensorInfo(recurrentToForgetWeights); + const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo + = BuildArmComputeTensorInfo(recurrentToCellWeights); + const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo + = BuildArmComputeTensorInfo(recurrentToOutputWeights); + const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias); + const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias); + const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias); + + arm_compute::TensorInfo aclInputToInputWeightsInfo; + arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; + arm_compute::TensorInfo aclCellToInputWeightsInfo; + arm_compute::TensorInfo aclInputGateBiasInfo; + arm_compute::TensorInfo aclProjectionWeightsInfo; + arm_compute::TensorInfo aclProjectionBiasInfo; + arm_compute::TensorInfo aclCellToForgetWeightsInfo; + arm_compute::TensorInfo aclCellToOutputWeightsInfo; + + if (!descriptor.m_CifgEnabled) + { + armnn::TensorInfo inputToInputWInfo = *inputToInputWeights; + aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo); + armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights; + aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo); + + if (cellToInputWeights != nullptr) + { + armnn::TensorInfo cellToInputWInfo = *cellToInputWeights; + aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo); + } + armnn::TensorInfo inputGateBiasInfo = *inputGateBias; + aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo); + lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, + cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr, + &aclInputGateBiasInfo); + } + + if (descriptor.m_ProjectionEnabled) + { + const armnn::TensorInfo& projectionWInfo = *projectionWeights; + aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo); + + if (projectionBias != nullptr) + { + const armnn::TensorInfo& projectionBiasInfo = *projectionBias; + aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo); + } + lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, + projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr); + } + + if (descriptor.m_PeepholeEnabled) + { + const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights; + aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo); + const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights; + aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo); + lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); + } + + float cell_threshold = descriptor.m_ClippingThresCell; + float projection_threshold = descriptor.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (descriptor.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (descriptor.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (descriptor.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (descriptor.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (descriptor.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, + &aclInputToCellWeightsInfo, + &aclInputToOutputWeightsInfo, + &aclRecurrentToForgetWeightsInfo, + &aclRecurrentToCellWeightsInfo, + &aclRecurrentToOutputWeightsInfo, + &aclForgetGateBiasInfo, + &aclCellBiasInfo, + &aclOutputGateBiasInfo, + &aclOutputStateInInfo, &aclCellStateInInfo, + &aclScratchBufferInfo, &aclOutputStateOutInfo, + &aclCellStateOutInfo, &aclOutputInfo, + lstm_params_info, activationLayerInfo, + cell_threshold, projection_threshold); +} + +void ClLstmFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_InputToInputWeightsTensor); + FreeTensorIfUnused(m_InputToForgetWeightsTensor); + FreeTensorIfUnused(m_InputToCellWeightsTensor); + FreeTensorIfUnused(m_InputToOutputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); + FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); + FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); + FreeTensorIfUnused(m_CellToInputWeightsTensor); + FreeTensorIfUnused(m_CellToForgetWeightsTensor); + FreeTensorIfUnused(m_CellToOutputWeightsTensor); + FreeTensorIfUnused(m_InputGateBiasTensor); + FreeTensorIfUnused(m_ForgetGateBiasTensor); + FreeTensorIfUnused(m_CellBiasTensor); + FreeTensorIfUnused(m_OutputGateBiasTensor); + FreeTensorIfUnused(m_ProjectionWeightsTensor); + FreeTensorIfUnused(m_ProjectionBiasTensor); + FreeTensorIfUnused(m_ScratchBuffer); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp b/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp new file mode 100644 index 0000000000..61d8fc3e6c --- /dev/null +++ b/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp @@ -0,0 +1,68 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include + +namespace armnn +{ + +class ClLstmFloatWorkload : public FloatWorkload +{ +public: + ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLLSTMLayer m_LstmLayer; + + std::unique_ptr m_InputToInputWeightsTensor; + std::unique_ptr m_InputToForgetWeightsTensor; + std::unique_ptr m_InputToCellWeightsTensor; + std::unique_ptr m_InputToOutputWeightsTensor; + std::unique_ptr m_RecurrentToInputWeightsTensor; + std::unique_ptr m_RecurrentToForgetWeightsTensor; + std::unique_ptr m_RecurrentToCellWeightsTensor; + std::unique_ptr m_RecurrentToOutputWeightsTensor; + std::unique_ptr m_CellToInputWeightsTensor; + std::unique_ptr m_CellToForgetWeightsTensor; + std::unique_ptr m_CellToOutputWeightsTensor; + std::unique_ptr m_InputGateBiasTensor; + std::unique_ptr m_ForgetGateBiasTensor; + std::unique_ptr m_CellBiasTensor; + std::unique_ptr m_OutputGateBiasTensor; + std::unique_ptr m_ProjectionWeightsTensor; + std::unique_ptr m_ProjectionBiasTensor; + + std::unique_ptr m_ScratchBuffer; + + void FreeUnusedTensors(); +}; + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor &descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights); +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp b/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp new file mode 100644 index 0000000000..151f1e0ee7 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp b/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp new file mode 100644 index 0000000000..9782f7a8f3 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerFloatWorkload : public ClBaseMergerWorkload +{ +public: + using ClBaseMergerWorkload::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/backends/ClWorkloads/ClMergerUint8Workload.cpp new file mode 100644 index 0000000000..9d1060d857 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/backends/ClWorkloads/ClMergerUint8Workload.hpp new file mode 100644 index 0000000000..cbfc19a0f2 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerUint8Workload : public ClBaseMergerWorkload +{ +public: + using ClBaseMergerWorkload::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..c3330a98e8 --- /dev/null +++ b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMultiplicationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + + +ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_PixelWiseMultiplication.configure(&input0, + &input1, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); +} + +void ClMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute"); + + // Executes the layer. + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..c2d6b7697a --- /dev/null +++ b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClMultiplicationFloatWorkload : public FloatWorkload +{ +public: + ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..d2625354ef --- /dev/null +++ b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClNormalizationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo layerInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +}; + +void ClNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f02d0adb70 --- /dev/null +++ b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class ClNormalizationFloatWorkload : public FloatWorkload +{ +public: + ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/backends/ClWorkloads/ClPermuteWorkload.cpp new file mode 100644 index 0000000000..29d98bf0eb --- /dev/null +++ b/src/backends/ClWorkloads/ClPermuteWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPermuteWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) +{ + const armnn::PermutationVector& perm = descriptor.m_DimMappings; + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) + && !perm.IsEqual({ 0U, 2U, 3U, 1U }) + && !perm.IsEqual({ 3U, 2U, 0U, 1U }), + "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); + + return arm_compute::Status{}; +} + +template +ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template +void ClPermuteWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class ClPermuteWorkload; +template class ClPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/backends/ClWorkloads/ClPermuteWorkload.hpp new file mode 100644 index 0000000000..a1f3161921 --- /dev/null +++ b/src/backends/ClWorkloads/ClPermuteWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); + +template +class ClPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("ClPermuteWorkload"); + return name; + } + + ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload::m_Data; + mutable arm_compute::CLPermute m_PermuteFunction; +}; + +using ClPermuteFloatWorkload = ClPermuteWorkload; +using ClPermuteUint8Workload = ClPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..a1ee50b39f --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dBaseWorkload.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template +ClPooling2dBaseWorkload::ClPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + // Run the layer. + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class ClPooling2dBaseWorkload; +template class ClPooling2dBaseWorkload; + +} diff --git a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..ea7ddfb41b --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template +class ClPooling2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::CLPoolingLayer m_PoolingLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..dc9d17f0ae --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dFloatWorkload") +{ +} + +void ClPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..71648d40f4 --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ +class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload +{ +public: + ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..0b4b15f806 --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dUint8Workload") +{ +} + +void ClPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..2baf2aa708 --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload +{ +public: + ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp b/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..ea50436a66 --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp b/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..48265143e5 --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClReshapeFloatWorkload : public FloatWorkload +{ +public: + ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp new file mode 100644 index 0000000000..82bd93ef9c --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output); +} + +void ClReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute"); + + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp new file mode 100644 index 0000000000..c9801a3ae1 --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Reshape +class ClReshapeUint8Workload : public Uint8Workload +{ +public: + ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp new file mode 100644 index 0000000000..8348afb76a --- /dev/null +++ b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClResizeBilinearFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, + arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), + arm_compute::SamplingPolicy::TOP_LEFT); +}; + +void ClResizeBilinearFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute"); + m_ResizeBilinearLayer.run(); +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp new file mode 100644 index 0000000000..f2ee67f5dd --- /dev/null +++ b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClResizeBilinearFloatWorkload : public FloatWorkload +{ +public: + ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLScale m_ResizeBilinearLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..b4ea236d49 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo); +} + +} diff --git a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..b800056cdf --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..c34b5a2a74 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void ClSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..965b845cf8 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class ClSoftmaxFloatWorkload : public FloatWorkload +{ +public: + ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..1bb9628d74 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void ClSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..29427a5976 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ +// Softmax +class ClSoftmaxUint8Workload : public Uint8Workload +{ +public: + ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + void Execute() const override; +private: + + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp b/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..5fd634bdb6 --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp b/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..a0b5846f8e --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class ClSplitterFloatWorkload : public ClBaseSplitterWorkload +{ +public: + using ClBaseSplitterWorkload::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp new file mode 100644 index 0000000000..50a251ada7 --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp new file mode 100644 index 0000000000..19e8be5034 --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ +class ClSplitterUint8Workload : public ClBaseSplitterWorkload +{ +public: + using ClBaseSplitterWorkload::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClSubtractionWorkload.cpp b/src/backends/ClWorkloads/ClSubtractionWorkload.cpp new file mode 100644 index 0000000000..1c70130fa4 --- /dev/null +++ b/src/backends/ClWorkloads/ClSubtractionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSubtractionWorkload.hpp" + +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template +ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template +void ClSubtractionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); + m_Layer.run(); +} + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClSubtractionWorkload; +template class armnn::ClSubtractionWorkload; diff --git a/src/backends/ClWorkloads/ClSubtractionWorkload.hpp b/src/backends/ClWorkloads/ClSubtractionWorkload.hpp new file mode 100644 index 0000000000..59a5f01e73 --- /dev/null +++ b/src/backends/ClWorkloads/ClSubtractionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +template +class ClSubtractionWorkload : public TypedWorkload +{ +public: + ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticSubtraction m_Layer; +}; + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClWorkloadUtils.hpp b/src/backends/ClWorkloads/ClWorkloadUtils.hpp new file mode 100644 index 0000000000..6f1b155745 --- /dev/null +++ b/src/backends/ClWorkloads/ClWorkloadUtils.hpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OpenClTimer.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include + +#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ + name, \ + armnn::OpenClTimer(), \ + armnn::WallClockTimer()) + +namespace armnn +{ + +template +void CopyArmComputeClTensorData(const T* srcData, arm_compute::CLTensor& dstTensor) +{ + { + ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); + dstTensor.map(true); + } + + { + ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); + armcomputetensorutils::CopyArmComputeITensorData(srcData, dstTensor); + } + + dstTensor.unmap(); +} + +template +void InitialiseArmComputeClTensorData(arm_compute::CLTensor& clTensor, const T* data) +{ + armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); + CopyArmComputeClTensorData(data, clTensor); +} + +inline void InitializeArmComputeClTensorDataForFloatTypes(arm_compute::CLTensor& clTensor, + const ConstCpuTensorHandle *handle) +{ + BOOST_ASSERT(handle); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + InitialiseArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::Float32: + InitialiseArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected floating point type."); + } +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/backend.cmake b/src/backends/ClWorkloads/backend.cmake new file mode 100644 index 0000000000..6f8eda1628 --- /dev/null +++ b/src/backends/ClWorkloads/backend.cmake @@ -0,0 +1,9 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTECL) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/ClWorkloads) + list(APPEND armnnLibraries armnnClBackend) +endif() diff --git a/src/backends/CpuTensorHandle.cpp b/src/backends/CpuTensorHandle.cpp new file mode 100644 index 0000000000..1a264531e5 --- /dev/null +++ b/src/backends/CpuTensorHandle.cpp @@ -0,0 +1,113 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "armnn/Exceptions.hpp" +#include "CpuTensorHandle.hpp" + +#include + +namespace armnn +{ + +ConstCpuTensorHandle::ConstCpuTensorHandle(const TensorInfo& tensorInfo) +: m_TensorInfo(tensorInfo) +, m_Memory(nullptr) +{ +} + +template <> +const void* ConstCpuTensorHandle::GetConstTensor() const +{ + return m_Memory; +} + +CpuTensorHandle::CpuTensorHandle(const TensorInfo& tensorInfo) +: ConstCpuTensorHandle(tensorInfo) +, m_MutableMemory(nullptr) +{ +} + +template <> +void* CpuTensorHandle::GetTensor() const +{ + return m_MutableMemory; +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const TensorInfo& tensorInfo) +: CpuTensorHandle(tensorInfo) +{ +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ConstTensor& tensor) +: ScopedCpuTensorHandle(tensor.GetInfo()) +{ + CopyFrom(tensor.GetMemoryArea(), tensor.GetNumBytes()); +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ConstCpuTensorHandle& tensorHandle) +: ScopedCpuTensorHandle(tensorHandle.GetTensorInfo()) +{ + CopyFrom(tensorHandle.GetConstTensor(), tensorHandle.GetTensorInfo().GetNumBytes()); +} + +ScopedCpuTensorHandle::ScopedCpuTensorHandle(const ScopedCpuTensorHandle& other) +: CpuTensorHandle(other.GetTensorInfo()) +{ + CopyFrom(other); +} + +ScopedCpuTensorHandle& ScopedCpuTensorHandle::operator=(const ScopedCpuTensorHandle& other) +{ + ::operator delete(GetTensor()); + SetMemory(nullptr); + CopyFrom(other); + return *this; +} + +ScopedCpuTensorHandle::~ScopedCpuTensorHandle() +{ + ::operator delete(GetTensor()); +} + +void ScopedCpuTensorHandle::Allocate() +{ + if (GetTensor() == nullptr) + { + SetMemory(::operator new(GetTensorInfo().GetNumBytes())); + } + else + { + throw InvalidArgumentException("CpuTensorHandle::Allocate Trying to allocate a CpuTensorHandle" + "that already has allocated memory."); + } +} + +void ScopedCpuTensorHandle::CopyFrom(const ScopedCpuTensorHandle& other) +{ + CopyFrom(other.GetTensor(), other.GetTensorInfo().GetNumBytes()); +} + +void ScopedCpuTensorHandle::CopyFrom(const void* srcMemory, unsigned int numBytes) +{ + BOOST_ASSERT(GetTensor() == nullptr); + BOOST_ASSERT(GetTensorInfo().GetNumBytes() == numBytes); + + if (srcMemory) + { + Allocate(); + memcpy(GetTensor(), srcMemory, numBytes); + } +} + +void PassthroughCpuTensorHandle::Allocate() +{ + throw InvalidArgumentException("PassthroughCpuTensorHandle::Allocate() should never be called"); +} + +void ConstPassthroughCpuTensorHandle::Allocate() +{ + throw InvalidArgumentException("ConstPassthroughCpuTensorHandle::Allocate() should never be called"); +} + +} // namespace armnn diff --git a/src/backends/CpuTensorHandle.hpp b/src/backends/CpuTensorHandle.hpp new file mode 100644 index 0000000000..541beefde6 --- /dev/null +++ b/src/backends/CpuTensorHandle.hpp @@ -0,0 +1,171 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once +#include "CpuTensorHandleFwd.hpp" + +#include "armnn/TypesUtils.hpp" + +#include "OutputHandler.hpp" + +#include + +namespace armnn +{ + +// Abstract tensor handles wrapping a CPU-readable region of memory, interpreting it as tensor data. +class ConstCpuTensorHandle : public ITensorHandle +{ +public: + template + const T* GetConstTensor() const + { + BOOST_ASSERT(GetTensorInfo().GetDataType() == GetDataType()); + return reinterpret_cast(m_Memory); + } + + const TensorInfo& GetTensorInfo() const + { + return m_TensorInfo; + } + + virtual ITensorHandle::Type GetType() const override + { + return ITensorHandle::Cpu; + } + + virtual void Manage() override {} + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual const void* Map(bool /* blocking = true */) const override { return m_Memory; } + virtual void Unmap() const override {} + + TensorShape GetStrides() const override + { + TensorShape shape(m_TensorInfo.GetShape()); + auto size = GetDataTypeSize(m_TensorInfo.GetDataType()); + auto runningSize = size; + std::vector strides(shape.GetNumDimensions()); + auto lastIdx = shape.GetNumDimensions()-1; + for (unsigned int i=0; i < lastIdx ; i++) + { + strides[lastIdx-i] = runningSize; + runningSize *= shape[lastIdx-i]; + } + strides[0] = runningSize; + return TensorShape(shape.GetNumDimensions(), strides.data()); + } + TensorShape GetShape() const override { return m_TensorInfo.GetShape(); } + +protected: + ConstCpuTensorHandle(const TensorInfo& tensorInfo); + + void SetConstMemory(const void* mem) { m_Memory = mem; } + +private: + ConstCpuTensorHandle(const ConstCpuTensorHandle& other) = delete; + ConstCpuTensorHandle& operator=(const ConstCpuTensorHandle& other) = delete; + + TensorInfo m_TensorInfo; + const void* m_Memory; +}; + +// Abstract specialization of ConstCpuTensorHandle that allows write access to the same data. +class CpuTensorHandle : public ConstCpuTensorHandle +{ +public: + template + T* GetTensor() const + { + BOOST_ASSERT(GetTensorInfo().GetDataType() == GetDataType()); + return reinterpret_cast(m_MutableMemory); + } + +protected: + CpuTensorHandle(const TensorInfo& tensorInfo); + + void SetMemory(void* mem) + { + m_MutableMemory = mem; + SetConstMemory(m_MutableMemory); + } + +private: + + CpuTensorHandle(const CpuTensorHandle& other) = delete; + CpuTensorHandle& operator=(const CpuTensorHandle& other) = delete; + void* m_MutableMemory; +}; + +// A CpuTensorHandle that owns the wrapped memory region. +class ScopedCpuTensorHandle : public CpuTensorHandle +{ +public: + explicit ScopedCpuTensorHandle(const TensorInfo& tensorInfo); + + // Copies contents from Tensor. + explicit ScopedCpuTensorHandle(const ConstTensor& tensor); + + // Copies contents from ConstCpuTensorHandle + explicit ScopedCpuTensorHandle(const ConstCpuTensorHandle& tensorHandle); + + ScopedCpuTensorHandle(const ScopedCpuTensorHandle& other); + ScopedCpuTensorHandle& operator=(const ScopedCpuTensorHandle& other); + ~ScopedCpuTensorHandle(); + + virtual void Allocate() override; + +private: + void CopyFrom(const ScopedCpuTensorHandle& other); + void CopyFrom(const void* srcMemory, unsigned int numBytes); +}; + +// A CpuTensorHandle that wraps an already allocated memory region. +// +// Clients must make sure the passed in memory region stays alive for the lifetime of +// the PassthroughCpuTensorHandle instance. +// +// Note there is no polymorphism to/from ConstPassthroughCpuTensorHandle. +class PassthroughCpuTensorHandle : public CpuTensorHandle +{ +public: + PassthroughCpuTensorHandle(const TensorInfo& tensorInfo, void* mem) + : CpuTensorHandle(tensorInfo) + { + SetMemory(mem); + } + + virtual void Allocate() override; +}; + +// A ConstCpuTensorHandle that wraps an already allocated memory region. +// +// This allows users to pass in const memory to a network. +// Clients must make sure the passed in memory region stays alive for the lifetime of +// the PassthroughCpuTensorHandle instance. +// +// Note there is no polymorphism to/from PassthroughCpuTensorHandle. +class ConstPassthroughCpuTensorHandle : public ConstCpuTensorHandle +{ +public: + ConstPassthroughCpuTensorHandle(const TensorInfo& tensorInfo, const void* mem) + : ConstCpuTensorHandle(tensorInfo) + { + SetConstMemory(mem); + } + + virtual void Allocate() override; +}; + + +// Template specializations. + +template <> +const void* ConstCpuTensorHandle::GetConstTensor() const; + +template <> +void* CpuTensorHandle::GetTensor() const; + +} // namespace armnn diff --git a/src/backends/CpuTensorHandleFwd.hpp b/src/backends/CpuTensorHandleFwd.hpp new file mode 100644 index 0000000000..d439d0bbe6 --- /dev/null +++ b/src/backends/CpuTensorHandleFwd.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +namespace armnn +{ + +class ConstCpuTensorHandle; +class CpuTensorHandle; +class ScopedCpuTensorHandle; +class PassthroughCpuTensorHandle; +class ConstPassthroughCpuTensorHandle; + +} // namespace armnn diff --git a/src/backends/ITensorHandle.hpp b/src/backends/ITensorHandle.hpp new file mode 100644 index 0000000000..02f4ed6e5a --- /dev/null +++ b/src/backends/ITensorHandle.hpp @@ -0,0 +1,73 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +namespace armnn +{ + +class TensorShape; + +class ITensorHandle +{ +public: + enum Type + { + Cpu, + CL, + Neon + }; + + virtual ~ITensorHandle(){} + + /// Indicate to the memory manager that this resource is active. + /// This is used to compute overlapping lifetimes of resources. + virtual void Manage() = 0; + + /// Indicate to the memory manager that this resource is no longer active. + /// This is used to compute overlapping lifetimes of resources. + virtual void Allocate() = 0; + + /// Get the type backend associated with the tensor handle. + /// \return Type enum + virtual ITensorHandle::Type GetType() const = 0; + + /// Get the parent tensor if this is a subtensor. + /// \return a pointer to the parent tensor. Otherwise nullptr if not a subtensor. + virtual ITensorHandle* GetParent() const = 0; + + /// Map the tensor data for access. + /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent) + /// \return pointer to the first element of the mapped data. + virtual const void* Map(bool blocking=true) const = 0; + + /// Unmap the tensor data + virtual void Unmap() const = 0; + + /// Map the tensor data for access. Must be paired with call to Unmap(). + /// \param blocking hint to block the calling thread until all other accesses are complete. (backend dependent) + /// \return pointer to the first element of the mapped data. + void* Map(bool blocking=true) + { + return const_cast(static_cast(this)->Map(blocking)); + } + + /// Unmap the tensor data that was previously mapped with call to Map(). + void Unmap() + { + return static_cast(this)->Unmap(); + } + + /// Get the strides for each dimension ordered from largest to smallest where + /// the smallest value is the same as the size of a single element in the tensor. + /// \return a TensorShape filled with the strides for each dimension + virtual TensorShape GetStrides() const = 0; + + /// Get the number of elements for each dimension orderd from slowest iterating dimension + /// to fastest iterating dimension. + /// \return a TensorShape filled with the number of elements for each dimension. + virtual TensorShape GetShape() const = 0; +}; + +} diff --git a/src/backends/MakeWorkloadHelper.hpp b/src/backends/MakeWorkloadHelper.hpp new file mode 100644 index 0000000000..281a65a21e --- /dev/null +++ b/src/backends/MakeWorkloadHelper.hpp @@ -0,0 +1,78 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +namespace armnn +{ +namespace +{ + +// Make a workload of the specified WorkloadType. +template +struct MakeWorkloadForType +{ + template + static std::unique_ptr Func(const QueueDescriptorType& descriptor, + const WorkloadInfo& info, + Args&&... args) + { + return std::make_unique(descriptor, info, std::forward(args)...); + } +}; + +// Specialization for void workload type used for unsupported workloads. +template<> +struct MakeWorkloadForType +{ + template + static std::unique_ptr Func(const QueueDescriptorType& descriptor, + const WorkloadInfo& info, + Args&&... args) + { + return nullptr; + } +}; + +// Makes a workload for one the specified types based on the data type requirements of the tensorinfo. +// Specify type void as the WorkloadType for unsupported DataType/WorkloadType combos. +template +std::unique_ptr MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args) +{ + const DataType dataType = !info.m_InputTensorInfos.empty() ? + info.m_InputTensorInfos[0].GetDataType() + : info.m_OutputTensorInfos[0].GetDataType(); + + BOOST_ASSERT(info.m_InputTensorInfos.empty() || info.m_OutputTensorInfos.empty() + || info.m_InputTensorInfos[0].GetDataType() == info.m_OutputTensorInfos[0].GetDataType()); + + switch (dataType) + { + case DataType::Float16: + return MakeWorkloadForType::Func(descriptor, info, std::forward(args)...); + case DataType::Float32: + return MakeWorkloadForType::Func(descriptor, info, std::forward(args)...); + case DataType::QuantisedAsymm8: + return MakeWorkloadForType::Func(descriptor, info, std::forward(args)...); + default: + BOOST_ASSERT_MSG(false, "Unknown DataType."); + return nullptr; + } +} + +// Makes a workload for one the specified types based on the data type requirements of the tensorinfo. +// Calling this method is the equivalent of calling the three typed MakeWorkload method with . +// Specify type void as the WorkloadType for unsupported DataType/WorkloadType combos. +template +std::unique_ptr MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args) +{ + return MakeWorkload(descriptor, info, + std::forward(args)...); +} + + +} //namespace +} //namespace armnn diff --git a/src/backends/MemCopyWorkload.cpp b/src/backends/MemCopyWorkload.cpp new file mode 100644 index 0000000000..75271a09de --- /dev/null +++ b/src/backends/MemCopyWorkload.cpp @@ -0,0 +1,61 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "MemCopyWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "TypeUtils.hpp" + +#include +#include + +namespace armnn +{ + +namespace +{ + +template +void GatherTensorHandlePairs(const MemCopyQueueDescriptor& descriptor, + std::vector>& tensorHandlePairs) +{ + const unsigned int numInputs = static_cast(descriptor.m_Inputs.size()); + tensorHandlePairs.reserve(numInputs); + + for (unsigned int i = 0; i < numInputs; ++i) + { + SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast( + descriptor.m_Inputs[i]); + DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast( + descriptor.m_Outputs[i]); + + tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); + } +} + +} //namespace + + +CopyMemGenericWorkload::CopyMemGenericWorkload(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void CopyMemGenericWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyMemGeneric_Execute"); + + auto copyFunc = [](void* dst, const void* src, size_t size) + { + memcpy(dst, src, size); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, copyFunc); + } +} + +} //namespace armnn diff --git a/src/backends/MemCopyWorkload.hpp b/src/backends/MemCopyWorkload.hpp new file mode 100644 index 0000000000..5227f32c9f --- /dev/null +++ b/src/backends/MemCopyWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "CpuTensorHandleFwd.hpp" +#include "backends/Workload.hpp" +#include "WorkloadUtils.hpp" +#include + +namespace armnn +{ + +class CopyMemGenericWorkload : public BaseWorkload +{ +public: + CopyMemGenericWorkload(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/NeonLayerSupport.cpp b/src/backends/NeonLayerSupport.cpp new file mode 100644 index 0000000000..30956dfba0 --- /dev/null +++ b/src/backends/NeonLayerSupport.cpp @@ -0,0 +1,468 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonLayerSupport.hpp" + +#include "LayerSupportCommon.hpp" +#include "InternalTypes.hpp" + +#include +#include +#include + +#include + +#ifdef ARMCOMPUTENEON_ENABLED +#include "NeonWorkloads/NeonAdditionFloatWorkload.hpp" +#include "NeonWorkloads/NeonActivationFloatWorkload.hpp" +#include "NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp" +#include "NeonWorkloads/NeonConvolution2dBaseWorkload.hpp" +#include "NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp" +#include "NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" +#include "NeonWorkloads/NeonMultiplicationFloatWorkload.hpp" +#include "NeonWorkloads/NeonNormalizationFloatWorkload.hpp" +#include "NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" +#include "NeonWorkloads/NeonPermuteWorkload.hpp" +#include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" +#include "NeonWorkloads/NeonSoftmaxBaseWorkload.hpp" +#include "NeonWorkloads/NeonSubtractionFloatWorkload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ + +bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases, + // and complement with NEDirectConvolutionLayerKernel::configure() implementation. + + // Only 1x1 is using direct convolution. Performance results and details are in: + // https://jira.arm.com/browse/IVGCVSW-1003 + // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc + + const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32); + + // Strides: 1|2|3 + const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) && + (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3); + + auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value) + { + return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value || + conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value; + }; + + // Supported sizes and padding. + const bool sizeAndPaddingSupported = + // Pad > 0 not supported for 1x1 weights. + (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u)); + + const bool preferDirectConvolution = dataTypeSupported && + strideSupported && + sizeAndPaddingSupported && + // NEDirectConvolutionLayerKernel doesn't support NULL bias. + desc.m_BiasEnabled; + return preferDirectConvolution; +} + +bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters) +{ + if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Unsupported normalisation method type, only LocalBrightness is supported"; + } + return false; + } + if (parameters.m_NormSize % 2 == 0) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Normalization size must be an odd number."; + } + return false; + } + + return true; +} + +bool IsNeonBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTENEON_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without NEON support"; + } + return false; +#endif +} + +template +bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported, + DataType dataType, + FloatFunc floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsNeonBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + floatFuncPtr, + uint8FuncPtr, + std::forward(params)...); +} + +#if ARMCOMPUTENEON_ENABLED +template +inline bool IsWorkloadSupported(FuncType& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsNeonBackendSupported(reasonIfUnsupported); +#endif + +bool IsActivationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + +bool IsAdditionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate, + reasonIfUnsupported, + input, + output, + mean, + var, + beta, + gamma, + descriptor); +} + +bool IsConstantSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConvolution2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDivisionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + // At the moment division is not supported + return false; +} + +bool IsSubtractionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + // At the moment U8 is unsupported + if (input.GetDataType() == DataType::QuantisedAsymm8) + { + return false; + } + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate, + reasonIfUnsupported, + input, + output, + weights, + biases, + descriptor); +} + +bool IsInputSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output); +} + +bool IsMergerSupportedNeon(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsMultiplicationSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsOutputSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsPooling2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return false; +} + +bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsSplitterSupportedNeon(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFloorSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsNeonBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(outputStateIn); + ignore_unused(cellStateIn); + ignore_unused(scratchBuffer); + ignore_unused(outputStateOut); + ignore_unused(cellStateOut); + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(inputToForgetWeights); + ignore_unused(inputToCellWeights); + ignore_unused(inputToOutputWeights); + ignore_unused(recurrentToForgetWeights); + ignore_unused(recurrentToCellWeights); + ignore_unused(recurrentToOutputWeights); + ignore_unused(forgetGateBias); + ignore_unused(cellBias); + ignore_unused(outputGateBias); + ignore_unused(inputToInputWeights); + ignore_unused(recurrentToInputWeights); + ignore_unused(cellToInputWeights); + ignore_unused(inputGateBias); + ignore_unused(projectionWeights); + ignore_unused(projectionBias); + ignore_unused(cellToForgetWeights); + ignore_unused(cellToOutputWeights); + return false; +} + +bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + return true; +} + +bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + return true; +} + +bool IsMeanSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/NeonLayerSupport.hpp b/src/backends/NeonLayerSupport.hpp new file mode 100644 index 0000000000..95b14b3ba6 --- /dev/null +++ b/src/backends/NeonLayerSupport.hpp @@ -0,0 +1,163 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include + +#include + +namespace armnn +{ + +bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); + +bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, + const NormalizationDescriptor& parameters); + +bool IsActivationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported); + +bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsAdditionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + + +bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedNeon(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedNeon(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/NeonTensorHandle.hpp b/src/backends/NeonTensorHandle.hpp new file mode 100644 index 0000000000..e385c83967 --- /dev/null +++ b/src/backends/NeonTensorHandle.hpp @@ -0,0 +1,137 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OutputHandler.hpp" +#include "ArmComputeTensorUtils.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +namespace armnn +{ + +class INeonTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ITensor& GetTensor() = 0; + virtual arm_compute::ITensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; +}; + +class NeonTensorHandle : public INeonTensorHandle +{ +public: + NeonTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override + { + armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); + }; + + virtual void Manage() override + { + BOOST_ASSERT(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override + { + m_MemoryGroup = boost::polymorphic_pointer_downcast(memoryGroup); + } + + virtual const void* Map(bool /* blocking = true */) const override + { + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + arm_compute::Tensor m_Tensor; + std::shared_ptr m_MemoryGroup; +}; + +class NeonSubTensorHandle : public INeonTensorHandle +{ +public: + NeonSubTensorHandle(INeonTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr&) override {} + + virtual const void* Map(bool /* blocking = true */) const override + { + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } +private: + arm_compute::SubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; +}; + +} diff --git a/src/backends/NeonWorkloadFactory.cpp b/src/backends/NeonWorkloadFactory.cpp new file mode 100644 index 0000000000..80ce0b918e --- /dev/null +++ b/src/backends/NeonWorkloadFactory.cpp @@ -0,0 +1,479 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NeonWorkloadFactory.hpp" +#include "armnn/Utils.hpp" +#include "CpuTensorHandle.hpp" +#include "Layer.hpp" + +#ifdef ARMCOMPUTENEON_ENABLED +#include "arm_compute/runtime/Allocator.h" + +#include "MemCopyWorkload.hpp" +#include "NeonTensorHandle.hpp" +#include "NeonWorkloadUtils.hpp" +#include "NeonWorkloads.hpp" + +#include "memory/IPoolManager.hpp" +#endif + +#include "MakeWorkloadHelper.hpp" + +#include + +namespace armnn +{ + +bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTENEON_ENABLED + +NeonWorkloadFactory::NeonWorkloadFactory() + : m_MemoryManager(std::make_unique(), BaseMemoryManager::MemoryAffinity::Offset) +{ +} + +std::unique_ptr NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon); + + const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + arm_compute::Coordinates coords; + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // Arm compute indexes tensor coords in reverse order. + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast(subTensorOrigin[revertedIndex])); + } + + return std::make_unique( + boost::polymorphic_downcast(&parent), shape, coords); +} + +std::unique_ptr NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + auto tensorHandle = std::make_unique(tensorInfo); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateConvolution2d( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr NeonWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload( + descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateNormalization( + const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateSubtraction( + const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload"); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +void NeonWorkloadFactory::Finalize() +{ + m_MemoryManager.Finalize(); +} + +void NeonWorkloadFactory::Release() +{ + m_MemoryManager.Release(); +} + +void NeonWorkloadFactory::Acquire() +{ + m_MemoryManager.Acquire(); +} + +#else // Compiled without ArmCompute libs + +NeonWorkloadFactory::NeonWorkloadFactory() +{ +} + +std::unique_ptr NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +void NeonWorkloadFactory::Finalize() +{} + +void NeonWorkloadFactory::Release() +{} + +void NeonWorkloadFactory::Acquire() +{} + +#endif + +} //namespace armnn diff --git a/src/backends/NeonWorkloadFactory.hpp b/src/backends/NeonWorkloadFactory.hpp new file mode 100644 index 0000000000..a981855314 --- /dev/null +++ b/src/backends/NeonWorkloadFactory.hpp @@ -0,0 +1,135 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OutputHandler.hpp" + +#include "memory/BaseMemoryManager.hpp" + +#include +#include + +namespace armnn +{ + +// Neon workload factory. +class NeonWorkloadFactory : public IWorkloadFactory +{ +public: + NeonWorkloadFactory(); + + virtual Compute GetCompute() const override { return Compute::CpuAcc; } + + static bool IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual void Finalize() override; + + virtual void Release() override; + + virtual void Acquire() override; + +private: +#ifdef ARMCOMPUTENEON_ENABLED + mutable NeonMemoryManager m_MemoryManager; +#endif +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloadUtils.cpp b/src/backends/NeonWorkloadUtils.cpp new file mode 100644 index 0000000000..878280161a --- /dev/null +++ b/src/backends/NeonWorkloadUtils.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NeonWorkloadUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonTensorHandle.hpp" + +#include "armnn/Utils.hpp" +#include "armnn/Exceptions.hpp" + +#include +#include +#include +#include + +#include "Profiling.hpp" + +#include "NeonLayerSupport.hpp" +#include "armnn/Types.hpp" +#include "Half.hpp" + +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +// Allocates a tensor and copy the contents in data to the tensor contents. +template +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data) +{ + InitialiseArmComputeTensorEmpty(tensor); + CopyArmComputeITensorData(data, tensor); +} + +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const Half* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const float* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const uint8_t* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const int32_t* data); + +void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, + const ConstCpuTensorHandle* handle) +{ + BOOST_ASSERT(handle); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + InitialiseArmComputeTensorData(tensor, handle->GetConstTensor()); + break; + case DataType::Float32: + InitialiseArmComputeTensorData(tensor, handle->GetConstTensor()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected floating point type."); + } +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloadUtils.hpp b/src/backends/NeonWorkloadUtils.hpp new file mode 100644 index 0000000000..15f9e3badf --- /dev/null +++ b/src/backends/NeonWorkloadUtils.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Workload.hpp" + +#include "backends/NeonTensorHandle.hpp" +#include "NeonTimer.hpp" + +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/NEON/NEFunctions.h" +#include + +#include + +namespace armnn +{ +class Layer; + +template +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data); + +void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, const ConstCpuTensorHandle* handle); +} //namespace armnn + + +#define ARMNN_SCOPED_PROFILING_EVENT_NEON(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc, \ + name, \ + armnn::WallClockTimer(), \ + armnn::NeonTimer()) diff --git a/src/backends/NeonWorkloads.hpp b/src/backends/NeonWorkloads.hpp new file mode 100644 index 0000000000..676c23cc4d --- /dev/null +++ b/src/backends/NeonWorkloads.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "backends/NeonWorkloads/NeonActivationFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonActivationUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonBaseConstantWorkload.hpp" +#include "backends/NeonWorkloads/NeonBaseMergerWorkload.hpp" +#include "backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp" +#include "backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonConstantFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonConstantUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp" +#include "backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp" +#include "backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp" +#include "backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonFloorFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonLstmFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonMergerFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonMergerUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonPermuteWorkload.hpp" +#include "backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp" +#include "backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonReshapeUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp" +#include "backends/NeonWorkloads/NeonSplitterUint8Workload.hpp" +#include "backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp" diff --git a/src/backends/NeonWorkloads/CMakeLists.txt b/src/backends/NeonWorkloads/CMakeLists.txt new file mode 100644 index 0000000000..02cb53dff1 --- /dev/null +++ b/src/backends/NeonWorkloads/CMakeLists.txt @@ -0,0 +1,83 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnNeonBackend_sources + NeonActivationFloatWorkload.cpp + NeonActivationFloatWorkload.hpp + NeonActivationUint8Workload.cpp + NeonActivationUint8Workload.hpp + NeonAdditionFloatWorkload.cpp + NeonAdditionFloatWorkload.hpp + NeonBaseConstantWorkload.hpp + NeonBaseMergerWorkload.hpp + NeonBaseSplitterWorkload.hpp + NeonBatchNormalizationFloatWorkload.cpp + NeonBatchNormalizationFloatWorkload.hpp + NeonConstantFloatWorkload.cpp + NeonConstantFloatWorkload.hpp + NeonConstantUint8Workload.cpp + NeonConstantUint8Workload.hpp + NeonConvertFp16ToFp32Workload.cpp + NeonConvertFp16ToFp32Workload.hpp + NeonConvertFp32ToFp16Workload.cpp + NeonConvertFp32ToFp16Workload.hpp + NeonConvolution2dBaseWorkload.cpp + NeonConvolution2dBaseWorkload.hpp + NeonConvolution2dFloatWorkload.cpp + NeonConvolution2dFloatWorkload.hpp + NeonConvolution2dUint8Workload.cpp + NeonConvolution2dUint8Workload.hpp + NeonDepthwiseConvolutionBaseWorkload.cpp + NeonDepthwiseConvolutionBaseWorkload.hpp + NeonDepthwiseConvolutionFloatWorkload.cpp + NeonDepthwiseConvolutionFloatWorkload.hpp + NeonDepthwiseConvolutionUint8Workload.cpp + NeonDepthwiseConvolutionUint8Workload.hpp + NeonFloorFloatWorkload.cpp + NeonFloorFloatWorkload.hpp + NeonFullyConnectedFloatWorkload.cpp + NeonFullyConnectedFloatWorkload.hpp + NeonL2NormalizationFloatWorkload.cpp + NeonL2NormalizationFloatWorkload.hpp + NeonLstmFloatWorkload.cpp + NeonLstmFloatWorkload.hpp + NeonMergerFloatWorkload.cpp + NeonMergerFloatWorkload.hpp + NeonMergerUint8Workload.cpp + NeonMergerUint8Workload.hpp + NeonMultiplicationFloatWorkload.cpp + NeonMultiplicationFloatWorkload.hpp + NeonNormalizationFloatWorkload.cpp + NeonNormalizationFloatWorkload.hpp + NeonPermuteWorkload.cpp + NeonPermuteWorkload.hpp + NeonPooling2dBaseWorkload.cpp + NeonPooling2dBaseWorkload.hpp + NeonPooling2dFloatWorkload.cpp + NeonPooling2dFloatWorkload.hpp + NeonPooling2dUint8Workload.cpp + NeonPooling2dUint8Workload.hpp + NeonReshapeFloatWorkload.cpp + NeonReshapeFloatWorkload.hpp + NeonReshapeUint8Workload.cpp + NeonReshapeUint8Workload.hpp + NeonSoftmaxBaseWorkload.cpp + NeonSoftmaxBaseWorkload.hpp + NeonSoftmaxFloatWorkload.cpp + NeonSoftmaxFloatWorkload.hpp + NeonSoftmaxUint8Workload.cpp + NeonSoftmaxUint8Workload.hpp + NeonSplitterFloatWorkload.cpp + NeonSplitterFloatWorkload.hpp + NeonSplitterUint8Workload.cpp + NeonSplitterUint8Workload.hpp + NeonSubtractionFloatWorkload.cpp + NeonSubtractionFloatWorkload.hpp +) + +add_library(armnnNeonBackend STATIC ${armnnNeonBackend_sources}) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp new file mode 100644 index 0000000000..bedf3dcb02 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp @@ -0,0 +1,57 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationFloatWorkload.hpp" +#include "backends/ArmComputeUtils.hpp" + + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Neon: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::NEActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +NeonActivationFloatWorkload::NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void NeonActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp new file mode 100644 index 0000000000..f8d25ca47d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +class NeonActivationFloatWorkload : public FloatWorkload +{ +public: + NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp new file mode 100644 index 0000000000..a9b94d2916 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationUint8Workload.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ +NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void NeonActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp b/src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp new file mode 100644 index 0000000000..405e600691 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonActivationUint8Workload : public Uint8Workload +{ +public: + NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp new file mode 100644 index 0000000000..adc34e91c4 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonAdditionFloatWorkload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticAddition::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + + +NeonAdditionFloatWorkload::NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonAdditionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonAdditionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloatWorkload_Execute"); + m_AddLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp new file mode 100644 index 0000000000..154e4f33d1 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonAdditionFloatWorkload : public FloatWorkload +{ +public: + NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticAddition m_AddLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp new file mode 100644 index 0000000000..f4a09d4aed --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp @@ -0,0 +1,83 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "Half.hpp" + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types. +template +class NeonBaseConstantWorkload : public TypedWorkload +{ +public: + NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override + { + using namespace armcomputetensorutils; + + // The intermediate tensor held by the corresponding layer output handler can be initialised with the + // given data on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer + // may not have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::ITensor& output = + boost::polymorphic_downcast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = + boost::polymorphic_downcast(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } + } + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp new file mode 100644 index 0000000000..603e7f3544 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ +// Base class template providing an implementation of the Merger layer common to all data types. +template +class NeonBaseMergerWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..9288d4427e --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template +class NeonBaseSplitterWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, splitter is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..6f5c948084 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonBatchNormalizationFloatWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "armnn/ArmNN.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + descriptor.m_Eps); +} + +NeonBatchNormalizationFloatWorkload::NeonBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Mean = std::make_unique(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void NeonBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void NeonBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn + + diff --git a/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..7982541748 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor); + +class NeonBatchNormalizationFloatWorkload : public FloatWorkload +{ +public: + NeonBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEBatchNormalizationLayer m_Layer; + + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Gamma; + std::unique_ptr m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp new file mode 100644 index 0000000000..dbdd057101 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantFloatWorkload.hpp" + +namespace armnn +{ + +void NeonConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloatWorkload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp new file mode 100644 index 0000000000..c35b5fda3e --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantFloatWorkload : public NeonBaseConstantWorkload +{ +public: + using NeonBaseConstantWorkload::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp new file mode 100644 index 0000000000..c607d86844 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantUint8Workload.hpp" + +namespace armnn +{ + +void NeonConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp b/src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp new file mode 100644 index 0000000000..2cb9516afe --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantUint8Workload : public NeonBaseConstantWorkload +{ +public: + using NeonBaseConstantWorkload::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..86ec31c71d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float16ToFloat32Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast(src); + auto output = reinterpret_cast(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..d70401b5f2 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..0f4fbe4e93 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "Profiling.hpp" +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32ToFloat16Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast(src); + auto output = reinterpret_cast(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..eb839fdd9d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..0e9894ce78 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp @@ -0,0 +1,146 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include "armnn/Types.hpp" +#include "Half.hpp" + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::NEConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +template +NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : TypedWorkload(descriptor, info) +{ + using arm_compute::NEDirectConvolutionLayer; + + ValidateData(); + + // todo: check tensor shapes match. + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + const bool preferDirectConvolution = + IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(), + m_Data.m_Parameters); + + if (preferDirectConvolution) + { + auto directConvolutionLayer = std::make_unique(memoryManager); + directConvolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(directConvolutionLayer.release()); + } + else + { + auto convolutionLayer = std::make_unique(memoryManager); + convolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(convolutionLayer.release()); + } + BOOST_ASSERT(m_ConvolutionLayer); + + armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType(); + + switch (dataType) + { + case DataType::Float16: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + case DataType::Float32: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + case DataType::QuantisedAsymm8: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown DataType."); + } + } +} + +template +void NeonConvolution2dBaseWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generates known implementations for linker. +template class NeonConvolution2dBaseWorkload; +template class NeonConvolution2dBaseWorkload; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..77d90cd84b --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonWorkloadUtils.hpp" +#include "backends/Workload.hpp" + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +#include + +namespace armnn +{ + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +template +class NeonConvolution2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + virtual void ValidateData() const {}; + +protected: + std::unique_ptr m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..ca7a0c575a --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dFloatWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonConvolution2dFloatWorkload::NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloatWorkload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dFloatWorkload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dFloatWorkload", 1, 1); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..dd8ef55f43 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class NeonConvolution2dFloatWorkload : public NeonConvolution2dBaseWorkload +{ +public: + NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + void Execute() const override; + void ValidateData() const override; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..5affe682b4 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dUint8Workload.hpp" + +namespace armnn +{ + +NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dUint8Workload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..ef60fc3e84 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload +{ +public: + NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + virtual void ValidateData() const override; + virtual void Execute() const override; +private: +}; + +} //namespace armnnn + diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..e79e14f2ed --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +} \ No newline at end of file diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..eec432be86 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/NeonWorkloadUtils.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..1ec1417a58 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,94 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionFloatWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..4ec8c1dc37 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload +{ +public: + NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr m_pDepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..b7813a59c5 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,94 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor()); + + if (m_BiasTensor) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor()); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..a0be512f9b --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload +{ +public: + NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr m_pDepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp new file mode 100644 index 0000000000..a08ba8a6ec --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFloorFloatWorkload.hpp" + +namespace armnn +{ +NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); + m_Layer.run(); +} +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp new file mode 100644 index 0000000000..ad9f44bbf9 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonFloorFloatWorkload : public FloatWorkload +{ +public: + NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp new file mode 100644 index 0000000000..2036ecb203 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFullyConnectedFloatWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + + return arm_compute::NEFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonFullyConnectedFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + + if (m_BiasesTensor) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void NeonFullyConnectedFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloatWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void NeonFullyConnectedFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp new file mode 100644 index 0000000000..27e5717b04 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class NeonFullyConnectedFloatWorkload : public FloatWorkload +{ +public: + NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..7296e67179 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonL2NormalizationFloatWorkload.hpp" +#include "backends/ArmComputeUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_Layer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void NeonL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..078c4d140f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class NeonL2NormalizationFloatWorkload : public FloatWorkload +{ +public: + NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + // Purposely not a NEL2Normalize function. See constructor. + mutable arm_compute::NENormalizationLayer m_Layer; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp new file mode 100644 index 0000000000..8b2b58d9b1 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonLstmFloatWorkload.hpp" + +namespace armnn +{ +NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonLstmFloatWorkload", 1, 1); +} + +void NeonLstmFloatWorkload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Neon backend!"); +} + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp new file mode 100644 index 0000000000..6064a017f9 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonLstmFloatWorkload : public FloatWorkload +{ +public: + NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp new file mode 100644 index 0000000000..79039aa51a --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerFloatWorkload.hpp" + +namespace armnn +{ + +void NeonMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloatWorkload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp new file mode 100644 index 0000000000..e7088b8c2f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerFloatWorkload : public NeonBaseMergerWorkload +{ +public: + using NeonBaseMergerWorkload::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp new file mode 100644 index 0000000000..3989702bd3 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerUint8Workload.hpp" + +namespace armnn +{ + +void NeonMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp b/src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp new file mode 100644 index 0000000000..73c0fd55ad --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerUint8Workload : public NeonBaseMergerWorkload +{ +public: + using NeonBaseMergerWorkload::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..c4241ece19 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMultiplicationFloatWorkload.hpp" + + +namespace armnn +{ + +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonMultiplicationFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + m_PixelWiseMultiplication.configure(&input1, + &input2, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +void NeonMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute"); + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn + + diff --git a/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..4b187b2d42 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonMultiplicationFloatWorkload : public FloatWorkload +{ +public: + NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..4534c376d8 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonNormalizationFloatWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_NormalizationLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); + std::string reasonIfUnsupported; + if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + // Input and output tensors have to have the same dimensionality. + if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] + || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] + || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] + || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2]) + { + throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality."); + } + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType); + arm_compute::NormalizationLayerInfo normalizationInfo(normType, + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K, + false); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +} + +void NeonNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..633944ddc9 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class NeonNormalizationFloatWorkload : public FloatWorkload +{ +public: + NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NENormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/backends/NeonWorkloads/NeonPermuteWorkload.cpp new file mode 100644 index 0000000000..9bc76ba853 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPermuteWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPermuteWorkload.hpp" +#include "backends/NeonTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const armnn::PermutationVector& mappings = descriptor.m_DimMappings; + + return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo, + armcomputetensorutils::BuildArmComputePermutationVector(mappings)); +} + +template +NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template +void NeonPermuteWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class NeonPermuteWorkload; +template class NeonPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/backends/NeonWorkloads/NeonPermuteWorkload.hpp new file mode 100644 index 0000000000..1fe05b1645 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPermuteWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +#include +#include + +#include + +namespace armnn +{ +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const PermuteDescriptor& descriptor); + +template +class NeonPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("NeonPermuteWorkload"); + return name; + } + + NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload::m_Data; + mutable arm_compute::NEPermute m_PermuteFunction; +}; + +using NeonPermuteFloatWorkload = NeonPermuteWorkload; +using NeonPermuteUint8Workload = NeonPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..208d08c4c5 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dBaseWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template +NeonPooling2dBaseWorkload::NeonPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class NeonPooling2dBaseWorkload; +template class NeonPooling2dBaseWorkload; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..77d6bf2f06 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template +class NeonPooling2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::NEPoolingLayer m_PoolingLayer; +}; + + +} //namespace armnn + + + + + diff --git a/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..46996b088c --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonPooling2dFloatWorkload::NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload(descriptor, info, + "NeonPooling2dFloatWorkload") +{ +} + +void NeonPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..78a35748bb --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dFloatWorkload : public NeonPooling2dBaseWorkload +{ +public: + NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..8f99a2be86 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dUint8Workload.hpp" + + + +namespace armnn +{ + +NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload(descriptor, info, "NeonPooling2dUint8Workload") +{ +} + +void NeonPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..d475c5f721 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload +{ +public: + NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..2dae9466bb --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonReshapeFloatWorkload::NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..066765adeb --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonReshapeFloatWorkload : public FloatWorkload +{ +public: + NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp new file mode 100644 index 0000000000..41aa07fe49 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeUint8Workload.hpp" + + + + +namespace armnn +{ +NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute"); + m_Layer.run(); +} +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp new file mode 100644 index 0000000000..3f7c470323 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonReshapeUint8Workload : public Uint8Workload +{ +public: + NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..ca9e4f058d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..24910df7c7 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor); + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..92e5139c1a --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxFloatWorkload.hpp" + +namespace armnn +{ + +NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1); + + // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void NeonSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..47745c658f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class NeonSoftmaxFloatWorkload : public FloatWorkload +{ +public: + NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..cff869c9b7 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxUint8Workload.hpp" + +namespace armnn +{ + +NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void NeonSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..f894c5a958 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +namespace armnn +{ + +class NeonSoftmaxUint8Workload : public Uint8Workload +{ +public: + NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..39ed5b7cbc --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterFloatWorkload.hpp" + +namespace armnn +{ + +void NeonSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloatWorkload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..744a4fe216 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterFloatWorkload : public NeonBaseSplitterWorkload +{ +public: + using NeonBaseSplitterWorkload::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp new file mode 100644 index 0000000000..4b2cf8fc91 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterUint8Workload.hpp" + +namespace armnn +{ + +void NeonSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp new file mode 100644 index 0000000000..f219cfaa7d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload +{ +public: + using NeonBaseSplitterWorkload::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp new file mode 100644 index 0000000000..3f37d82d22 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSubtractionFloatWorkload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticSubtraction::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + +NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonSubtractionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonSubtractionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute"); + m_SubLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp new file mode 100644 index 0000000000..18988a35ca --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonSubtractionFloatWorkload : public FloatWorkload +{ +public: + NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticSubtraction m_SubLayer; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/backend.cmake b/src/backends/NeonWorkloads/backend.cmake new file mode 100644 index 0000000000..f0908c5496 --- /dev/null +++ b/src/backends/NeonWorkloads/backend.cmake @@ -0,0 +1,9 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTENEON) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/NeonWorkloads) + list(APPEND armnnLibraries armnnNeonBackend) +endif() diff --git a/src/backends/OutputHandler.cpp b/src/backends/OutputHandler.cpp new file mode 100644 index 0000000000..c1be5b7dc4 --- /dev/null +++ b/src/backends/OutputHandler.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "OutputHandler.hpp" + +#include +#include + +#include "backends/WorkloadFactory.hpp" +#include "backends/WorkloadDataCollector.hpp" +#include "backends/ITensorHandle.hpp" + +namespace armnn +{ + +void OutputHandler::SetTensorInfo(const TensorInfo& tensorInfo) +{ + m_TensorInfo = tensorInfo; + m_bTensorInfoSet = true; +} + +void OutputHandler::CreateTensorHandles(const IWorkloadFactory& factory) +{ + m_TensorHandle = factory.CreateTensorHandle(m_TensorInfo); +} + +void OutputHandler::CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const +{ + dataCollector.Push(m_TensorHandle.get(), m_TensorInfo); +} + +} // namespace armnn diff --git a/src/backends/OutputHandler.hpp b/src/backends/OutputHandler.hpp new file mode 100644 index 0000000000..dfc01844c9 --- /dev/null +++ b/src/backends/OutputHandler.hpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "backends/WorkloadDataFwd.hpp" + +#include +#include + +#include +#include + +#include + +#include "armnn/INetwork.hpp" +#include "armnn/Types.hpp" +#include "armnn/Descriptors.hpp" +#include "armnn/Tensor.hpp" +#include "ITensorHandle.hpp" + +namespace armnn +{ + +class ITensorHandle; +class IWorkloadFactory; +class OutputSlot; +class WorkloadDataCollector; + +class OutputHandler +{ +public: + /// @brief - Sets the TensorInfo used by this output handler. + /// @param tensorInfo - TensorInfo for the output. + void SetTensorInfo(const TensorInfo& tensorInfo); + + /// @brief - Creates tensor handlers used by the intermediate tensors. Does not allocate memory. + /// @param factory - Factory to be used for handler creation. + void CreateTensorHandles(const IWorkloadFactory& factory); + + /// @brief - Gets the matching TensorInfo for the output. + /// @return - References to the output TensorInfo. + const TensorInfo& GetTensorInfo() const { return m_TensorInfo; } + + /// @brief - Gets the allocated tensor memory. + /// @return - Pointer to the tensor memory. + ITensorHandle* GetData() const { return m_TensorHandle.get(); } + + /// Fill the outputs for a given queue descriptor. + void CollectWorkloadOutputs(WorkloadDataCollector& dataCollector) const; + + void SetData(std::unique_ptr data) { m_TensorHandle = std::move(data); } + + /// @brief Returns true if SetTensorInfo() has been called at least once on this. + bool IsTensorInfoSet() const { return m_bTensorInfoSet; } +private: + std::unique_ptr m_TensorHandle; + TensorInfo m_TensorInfo; + bool m_bTensorInfoSet = false; +}; + +} //namespace armnn diff --git a/src/backends/RefLayerSupport.cpp b/src/backends/RefLayerSupport.cpp new file mode 100644 index 0000000000..d56cdebeda --- /dev/null +++ b/src/backends/RefLayerSupport.cpp @@ -0,0 +1,398 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayerSupportCommon.hpp" +#include "RefLayerSupport.hpp" +#include +#include +#include + +#include +#include "InternalTypes.hpp" + +using namespace boost; + +namespace armnn +{ + +template +bool IsSupportedForDataTypeRef(std::string* reasonIfUnsupported, + DataType dataType, + Float32Func floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + &FalseFunc, + floatFuncPtr, + uint8FuncPtr, + std::forward(params)...); +} + +bool IsActivationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsAdditionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsBatchNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConstantSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConvolution2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + ignore_unused(output); + ignore_unused(weights); + ignore_unused(biases); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(weights); + ignore_unused(biases); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsDivisionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSubtractionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFullyConnectedSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(weights); + ignore_unused(biases); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsInputSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMergerSupportedRef(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsMultiplicationSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input1); + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input0.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsOutputSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPooling2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsResizeBilinearSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSoftmaxSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsSplitterSupportedRef(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedRef(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsReshapeSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFloorSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsSupportedForDataTypeRef(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(outputStateIn); + ignore_unused(cellStateIn); + ignore_unused(scratchBuffer); + ignore_unused(outputStateOut); + ignore_unused(cellStateOut); + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(inputToForgetWeights); + ignore_unused(inputToCellWeights); + ignore_unused(inputToOutputWeights); + ignore_unused(recurrentToForgetWeights); + ignore_unused(recurrentToCellWeights); + ignore_unused(recurrentToOutputWeights); + ignore_unused(forgetGateBias); + ignore_unused(cellBias); + ignore_unused(outputGateBias); + ignore_unused(inputToInputWeights); + ignore_unused(recurrentToInputWeights); + ignore_unused(cellToInputWeights); + ignore_unused(inputGateBias); + ignore_unused(projectionWeights); + ignore_unused(projectionBias); + ignore_unused(cellToForgetWeights); + ignore_unused(cellToOutputWeights); + return false; +} + +bool IsConvertFp16ToFp32SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return (IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseInputFuncF32<>, + &FalseFuncU8<>) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + output.GetDataType(), + &FalseOutputFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>)); +} + +bool IsConvertFp32ToFp16SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return (IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseInputFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &FalseOutputFuncF32<>, + &FalseFuncU8<>)); +} + +bool IsMeanSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/RefLayerSupport.hpp b/src/backends/RefLayerSupport.hpp new file mode 100644 index 0000000000..ff2e7e387f --- /dev/null +++ b/src/backends/RefLayerSupport.hpp @@ -0,0 +1,155 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace armnn +{ + +bool IsActivationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedRef(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedRef(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedRef(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedRef(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedRef(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedRef(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedRef(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedRef(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/RefWorkloadFactory.cpp b/src/backends/RefWorkloadFactory.cpp new file mode 100644 index 0000000000..93b4d946c4 --- /dev/null +++ b/src/backends/RefWorkloadFactory.cpp @@ -0,0 +1,249 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "CpuTensorHandle.hpp" +#include "RefWorkloadFactory.hpp" +#include "RefWorkloads.hpp" +#include "Layer.hpp" +#include "MemCopyWorkload.hpp" +#include "MakeWorkloadHelper.hpp" + +#include + +namespace armnn +{ + +template +std::unique_ptr RefWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, + const WorkloadInfo& info) const +{ + return armnn::MakeWorkload(descriptor, info); +} + +RefWorkloadFactory::RefWorkloadFactory() +{ +} + +bool RefWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::CpuRef, layer, dataType, outReasonIfUnsupported); +} + +std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return std::make_unique(tensorInfo); +} + +std::unique_ptr RefWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (info.m_InputTensorInfos.empty() ) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Input cannot be zero length"); + } + if (info.m_OutputTensorInfos.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: Output cannot be zero length"); + } + + if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateInput: data input and output differ in byte count."); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (info.m_InputTensorInfos.empty() ) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Input cannot be zero length"); + } + if (info.m_OutputTensorInfos.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: Output cannot be zero length"); + } + if (info.m_InputTensorInfos[0].GetNumBytes() != info.m_OutputTensorInfos[0].GetNumBytes()) + { + throw InvalidArgumentException("RefWorkloadFactory::CreateOutput: data input and output differ in byte count."); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConvolution2d( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateNormalization( + const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty()) + { + throw InvalidArgumentException("RefWorkloadFactory: CreateMemCopy() expected an input tensor."); + } + return std::make_unique(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateSubtraction( + const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreateMean( + const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr RefWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + + +} // namespace armnn diff --git a/src/backends/RefWorkloadFactory.hpp b/src/backends/RefWorkloadFactory.hpp new file mode 100644 index 0000000000..6b13377167 --- /dev/null +++ b/src/backends/RefWorkloadFactory.hpp @@ -0,0 +1,145 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "WorkloadFactory.hpp" +#include "OutputHandler.hpp" + +#include +#include + +namespace armnn +{ + +template +constexpr bool IsOperationQueueDescriptor(const QueueDescriptorType&) { return true; } + +template <> +constexpr bool IsOperationQueueDescriptor(const MemCopyQueueDescriptor&) { return false; } + +template <> +constexpr bool IsOperationQueueDescriptor(const ConstantQueueDescriptor&) { return false; } + +template <> +constexpr bool IsOperationQueueDescriptor(const PermuteQueueDescriptor&) { return false; } + +// Reference workload factory. +class RefWorkloadFactory : public IWorkloadFactory +{ +public: + explicit RefWorkloadFactory(); + virtual ~RefWorkloadFactory() {} + + virtual Compute GetCompute() const override { return Compute::CpuRef; } + + static bool IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return false; } + + virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override + { + boost::ignore_unused(parent, subTensorShape, subTensorOrigin); + return nullptr; + } + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; +private: + + template + std::unique_ptr MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info) const; + +}; + +} // namespace armnn diff --git a/src/backends/RefWorkloads.hpp b/src/backends/RefWorkloads.hpp new file mode 100644 index 0000000000..746a59e71f --- /dev/null +++ b/src/backends/RefWorkloads.hpp @@ -0,0 +1,53 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/RefWorkloads/RefConstantUint8Workload.hpp" +#include "backends/RefWorkloads/ArithmeticFunction.hpp" +#include "backends/RefWorkloads/RefArithmeticWorkload.hpp" +#include "backends/RefWorkloads/ConvImpl.hpp" +#include "backends/RefWorkloads/RefBaseConstantWorkload.hpp" +#include "backends/RefWorkloads/RefConvolution2dUint8Workload.hpp" +#include "backends/RefWorkloads/RefSplitterUint8Workload.hpp" +#include "backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp" +#include "backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefActivationUint8Workload.hpp" +#include "backends/RefWorkloads/RefPooling2dFloat32Workload.hpp" +#include "backends/RefWorkloads/RefWorkloadUtils.hpp" +#include "backends/RefWorkloads/RefMergerUint8Workload.hpp" +#include "backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp" +#include "backends/RefWorkloads/Softmax.hpp" +#include "backends/RefWorkloads/RefMergerFloat32Workload.hpp" +#include "backends/RefWorkloads/TensorBufferArrayView.hpp" +#include "backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp" +#include "backends/RefWorkloads/Splitter.hpp" +#include "backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp" +#include "backends/RefWorkloads/RefReshapeFloat32Workload.hpp" +#include "backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp" +#include "backends/RefWorkloads/FullyConnected.hpp" +#include "backends/RefWorkloads/RefFloorFloat32Workload.hpp" +#include "backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp" +#include "backends/RefWorkloads/RefSoftmaxUint8Workload.hpp" +#include "backends/RefWorkloads/RefReshapeUint8Workload.hpp" +#include "backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp" +#include "backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp" +#include "backends/RefWorkloads/ResizeBilinear.hpp" +#include "backends/RefWorkloads/RefNormalizationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp" +#include "backends/RefWorkloads/RefPooling2dUint8Workload.hpp" +#include "backends/RefWorkloads/BatchNormImpl.hpp" +#include "backends/RefWorkloads/Activation.hpp" +#include "backends/RefWorkloads/Merger.hpp" +#include "backends/RefWorkloads/RefSplitterFloat32Workload.hpp" +#include "backends/RefWorkloads/RefConstantFloat32Workload.hpp" +#include "backends/RefWorkloads/RefActivationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp" +#include "backends/RefWorkloads/Pooling2d.hpp" +#include "backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp" +#include "backends/RefWorkloads/RefPermuteWorkload.hpp" +#include "backends/RefWorkloads/RefLstmFloat32Workload.hpp" +#include "backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp" +#include "backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp" diff --git a/src/backends/RefWorkloads/Activation.cpp b/src/backends/RefWorkloads/Activation.cpp new file mode 100644 index 0000000000..ef4903074b --- /dev/null +++ b/src/backends/RefWorkloads/Activation.cpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Activation.hpp" + +#include + +#include + +namespace armnn +{ + +void Activation(const float* in, + float* out, + const TensorInfo& tensorInfo, + ActivationFunction function, + float a, + float b) +{ + for (size_t i = 0; i 0.0f ? input : (input * a); + break; + } + case ActivationFunction::Abs: + { + output = input < 0 ? -input : input; + break; + } + case ActivationFunction::Sqrt: + { + output = sqrtf(input); + break; + } + case ActivationFunction::Square: + { + output = input * input; + break; + } + case ActivationFunction::TanH: + { + output = a * tanhf(b * input); + break; + } + default: + { + BOOST_LOG_TRIVIAL(error) << "Unsupported activation function"; + return; + } + } + + out[i] = output; + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Activation.hpp b/src/backends/RefWorkloads/Activation.hpp new file mode 100644 index 0000000000..c8a23114f0 --- /dev/null +++ b/src/backends/RefWorkloads/Activation.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +namespace armnn +{ + +/// Performs the ActivationFunction elementwise on the inputs to give the outputs. +void Activation(const float* in, + float* out, + const TensorInfo& tensorInfo, + ActivationFunction function, + float a, + float b); + +} //namespace armnn diff --git a/src/backends/RefWorkloads/ArithmeticFunction.cpp b/src/backends/RefWorkloads/ArithmeticFunction.cpp new file mode 100644 index 0000000000..fede138253 --- /dev/null +++ b/src/backends/RefWorkloads/ArithmeticFunction.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArithmeticFunction.hpp" +#include "Broadcast.hpp" +#include + +namespace armnn +{ + +template +ArithmeticFunction::ArithmeticFunction(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData) +{ + BroadcastLoop(inShape0, inShape1, outShape).Unroll(Functor(), 0, inData0, inData1, outData); +} + +} //namespace armnn + +template struct armnn::ArithmeticFunction>; +template struct armnn::ArithmeticFunction>; +template struct armnn::ArithmeticFunction>; +template struct armnn::ArithmeticFunction>; diff --git a/src/backends/RefWorkloads/ArithmeticFunction.hpp b/src/backends/RefWorkloads/ArithmeticFunction.hpp new file mode 100644 index 0000000000..eafb6444f6 --- /dev/null +++ b/src/backends/RefWorkloads/ArithmeticFunction.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +template +struct ArithmeticFunction +{ + ArithmeticFunction(const TensorShape& inShape0, + const TensorShape& inShape1, + const TensorShape& outShape, + const float* inData0, + const float* inData1, + float* outData); +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/BatchNormImpl.hpp b/src/backends/RefWorkloads/BatchNormImpl.hpp new file mode 100644 index 0000000000..a7579c8373 --- /dev/null +++ b/src/backends/RefWorkloads/BatchNormImpl.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include + +#include + +namespace armnn +{ + +template +static void BatchNormImpl(NormData data, + const float* varIn, + const float* meanIn, + const float* gammaIn, + const float* betaIn, + float * outputData, + const float * inputData) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + for (unsigned int c = 0; c < inputInfo0.GetShape()[1]; c++) + { + float var = varIn[c]; + float mean = meanIn[c]; + float gamma = gammaIn[c]; + float beta = betaIn[c]; + + float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps); + float add = beta - mult * mean; + + for (unsigned int n = 0; n < inputInfo0.GetShape()[0]; n++) + { + for (unsigned int j = 0; j < inputInfo0.GetShape()[2]; j++) + { + for (unsigned int i = 0; i < inputInfo0.GetShape()[3]; i++) + { + unsigned int index = i + + j*inputInfo0.GetShape()[3] + + c*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + + n*inputInfo0.GetShape()[3] * inputInfo0.GetShape()[2] + * inputInfo0.GetShape()[1]; + + outputData[index] = mult * inputData[index] + add; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Broadcast.cpp b/src/backends/RefWorkloads/Broadcast.cpp new file mode 100644 index 0000000000..8421a0a7ed --- /dev/null +++ b/src/backends/RefWorkloads/Broadcast.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Broadcast.hpp" + +namespace armnn +{ + +BroadcastLoop::BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape) +: m_DimData(outShape.GetNumDimensions()) +{ + const unsigned int numDims = GetNumDimensions(); + + unsigned int sIn0 = 1; + unsigned int sIn1 = 1; + unsigned int sOut = 1; + + for (unsigned int j = numDims - 1, k = 0; k < numDims ; k++, j--) + { + m_DimData[j].m_DimSize = outShape[j]; + m_DimData[j].m_Stride1 = (inShape0[j] > 1) ? sIn0 : 0; + m_DimData[j].m_Stride2 = (inShape1[j] > 1) ? sIn1 : 0; + m_DimData[j].m_StrideOut = sOut; + + sIn0 *= inShape0[j]; + sIn1 *= inShape1[j]; + sOut *= outShape[j]; + } +} + +} // namespace armnn diff --git a/src/backends/RefWorkloads/Broadcast.hpp b/src/backends/RefWorkloads/Broadcast.hpp new file mode 100644 index 0000000000..e92ed0598d --- /dev/null +++ b/src/backends/RefWorkloads/Broadcast.hpp @@ -0,0 +1,58 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +namespace armnn +{ + +struct BroadcastLoop +{ + BroadcastLoop(const TensorShape& inShape0, const TensorShape& inShape1, const TensorShape& outShape); + + unsigned int GetNumDimensions() + { + return static_cast(m_DimData.size()); + } + + template + void Unroll(Func operationFunc, + unsigned int dimension, + const T0* inData0, + const T1* inData1, + U* outData) + { + if (dimension >= GetNumDimensions()) + { + *outData = operationFunc(*inData0, *inData1); + return; + } + + for (unsigned int i = 0; i < m_DimData[dimension].m_DimSize; i++) + { + Unroll(operationFunc, dimension + 1, inData0, inData1, outData); + + inData0 += m_DimData[dimension].m_Stride1; + inData1 += m_DimData[dimension].m_Stride2; + outData += m_DimData[dimension].m_StrideOut; + } + } + +private: + // Struct to hold the dimension data. + struct BroadcastDimensionData + { + unsigned int m_DimSize; + unsigned int m_StrideOut; + unsigned int m_Stride1; + unsigned int m_Stride2; + }; + + std::vector m_DimData; +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/RefWorkloads/CMakeLists.txt b/src/backends/RefWorkloads/CMakeLists.txt new file mode 100644 index 0000000000..0f4bfe129f --- /dev/null +++ b/src/backends/RefWorkloads/CMakeLists.txt @@ -0,0 +1,101 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnRefBackend_sources + Activation.cpp + Activation.hpp + ArithmeticFunction.cpp + ArithmeticFunction.hpp + BatchNormImpl.hpp + Broadcast.cpp + Broadcast.hpp + ConvImpl.cpp + ConvImpl.hpp + FullyConnected.cpp + FullyConnected.hpp + Merger.hpp + Pooling2d.cpp + Pooling2d.hpp + RefActivationFloat32Workload.cpp + RefActivationFloat32Workload.hpp + RefActivationUint8Workload.cpp + RefActivationUint8Workload.hpp + RefArithmeticWorkload.cpp + RefArithmeticWorkload.hpp + RefBaseConstantWorkload.cpp + RefBaseConstantWorkload.hpp + RefBatchNormalizationFloat32Workload.cpp + RefBatchNormalizationFloat32Workload.hpp + RefBatchNormalizationUint8Workload.cpp + RefBatchNormalizationUint8Workload.hpp + RefConstantFloat32Workload.cpp + RefConstantFloat32Workload.hpp + RefConstantUint8Workload.cpp + RefConstantUint8Workload.hpp + RefConvertFp16ToFp32Workload.cpp + RefConvertFp16ToFp32Workload.hpp + RefConvertFp32ToFp16Workload.cpp + RefConvertFp32ToFp16Workload.hpp + RefConvolution2dFloat32Workload.cpp + RefConvolution2dFloat32Workload.hpp + RefConvolution2dUint8Workload.cpp + RefConvolution2dUint8Workload.hpp + RefDepthwiseConvolution2dFloat32Workload.cpp + RefDepthwiseConvolution2dFloat32Workload.hpp + RefDepthwiseConvolution2dUint8Workload.cpp + RefDepthwiseConvolution2dUint8Workload.hpp + RefFakeQuantizationFloat32Workload.cpp + RefFakeQuantizationFloat32Workload.hpp + RefFloorFloat32Workload.cpp + RefFloorFloat32Workload.hpp + RefFullyConnectedFloat32Workload.cpp + RefFullyConnectedFloat32Workload.hpp + RefFullyConnectedUint8Workload.cpp + RefFullyConnectedUint8Workload.hpp + RefL2NormalizationFloat32Workload.cpp + RefL2NormalizationFloat32Workload.hpp + RefLstmFloat32Workload.cpp + RefLstmFloat32Workload.hpp + RefMergerFloat32Workload.cpp + RefMergerFloat32Workload.hpp + RefMergerUint8Workload.cpp + RefMergerUint8Workload.hpp + RefNormalizationFloat32Workload.cpp + RefNormalizationFloat32Workload.hpp + RefPermuteWorkload.cpp + RefPermuteWorkload.hpp + RefPooling2dFloat32Workload.cpp + RefPooling2dFloat32Workload.hpp + RefPooling2dUint8Workload.cpp + RefPooling2dUint8Workload.hpp + RefReshapeFloat32Workload.cpp + RefReshapeFloat32Workload.hpp + RefReshapeUint8Workload.cpp + RefReshapeUint8Workload.hpp + RefResizeBilinearFloat32Workload.cpp + RefResizeBilinearFloat32Workload.hpp + RefResizeBilinearUint8Workload.cpp + RefResizeBilinearUint8Workload.hpp + RefSoftmaxFloat32Workload.cpp + RefSoftmaxFloat32Workload.hpp + RefSoftmaxUint8Workload.cpp + RefSoftmaxUint8Workload.hpp + RefSplitterFloat32Workload.cpp + RefSplitterFloat32Workload.hpp + RefSplitterUint8Workload.cpp + RefSplitterUint8Workload.hpp + RefWorkloadUtils.hpp + ResizeBilinear.cpp + ResizeBilinear.hpp + Softmax.cpp + Softmax.hpp + Splitter.hpp + TensorBufferArrayView.hpp +) + +add_library(armnnRefBackend STATIC ${armnnRefBackend_sources}) +target_include_directories(armnnRefBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnRefBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnRefBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/RefWorkloads/ConvImpl.cpp b/src/backends/RefWorkloads/ConvImpl.cpp new file mode 100644 index 0000000000..8743a2bd0d --- /dev/null +++ b/src/backends/RefWorkloads/ConvImpl.cpp @@ -0,0 +1,71 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ConvImpl.hpp" + +#include + +#include +#include + +namespace armnn +{ + +QuantizedMultiplierSmallerThanOne::QuantizedMultiplierSmallerThanOne(float multiplier) +{ + BOOST_ASSERT(multiplier >= 0.0f && multiplier < 1.0f); + if (multiplier == 0.0f) + { + m_Multiplier = 0; + m_RightShift = 0; + } + else + { + const double q = std::frexp(multiplier, &m_RightShift); + m_RightShift = -m_RightShift; + int64_t qFixed = static_cast(std::round(q * (1ll << 31))); + BOOST_ASSERT(qFixed <= (1ll << 31)); + if (qFixed == (1ll << 31)) + { + qFixed /= 2; + --m_RightShift; + } + BOOST_ASSERT(m_RightShift >= 0); + BOOST_ASSERT(qFixed <= std::numeric_limits::max()); + m_Multiplier = static_cast(qFixed); + } +} + +int32_t QuantizedMultiplierSmallerThanOne::operator*(int32_t rhs) const +{ + int32_t x = SaturatingRoundingDoublingHighMul(rhs, m_Multiplier); + return RoundingDivideByPOT(x, m_RightShift); +} + +int32_t QuantizedMultiplierSmallerThanOne::SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) +{ + // Check for overflow. + if (a == b && a == std::numeric_limits::min()) + { + return std::numeric_limits::max(); + } + int64_t a_64(a); + int64_t b_64(b); + int64_t ab_64 = a_64 * b_64; + int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + int32_t ab_x2_high32 = static_cast((ab_64 + nudge) / (1ll << 31)); + return ab_x2_high32; +} + +int32_t QuantizedMultiplierSmallerThanOne::RoundingDivideByPOT(int32_t x, int exponent) +{ + BOOST_ASSERT(exponent >= 0 && exponent <= 31); + int32_t mask = (1 << exponent) - 1; + int32_t remainder = x & mask; + int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0); + return (x >> exponent) + (remainder > threshold ? 1 : 0); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/ConvImpl.hpp b/src/backends/RefWorkloads/ConvImpl.hpp new file mode 100644 index 0000000000..4c9ab2a644 --- /dev/null +++ b/src/backends/RefWorkloads/ConvImpl.hpp @@ -0,0 +1,187 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include + +#include +#include + +#include +#include + +namespace armnn +{ + +/// Performs multiplication of an integer with a multiplier which is less than one, +/// using quantized integer arithmetic which is consistent with AndroidNN's CPU executor. +struct QuantizedMultiplierSmallerThanOne +{ +public: + /// Constructs a QuantizedMultiplierSmallerThanOne which will multiply by the given multiplier. + /// This stores the appropriate integer quantities (derived from the given multiplier) for later use. + /// The implementation of this function is adapted from Android NN's QuantizeMultiplierSmallerThanOne(). + QuantizedMultiplierSmallerThanOne(float multiplier); + + /// The implementation of this function is adapted from Android NN's MultiplyByQuantizedMultiplierSmallerThanOne(). + int32_t operator*(int32_t rhs) const; + +private: + /// The implementation of this function is adapted from gemmlowp's SaturatingRoundingDoublingHighMul(). + static int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b); + + /// The implementation of this function is adapted from gemmlowp's RoundingDivideByPOT(). + static int32_t RoundingDivideByPOT(int32_t x, int exponent); + + int32_t m_Multiplier; + int32_t m_RightShift; +}; + +/// An implementation shared by normal and depthwise convolution. +template +static void ConvImpl(ConvData data, + const InputType* inputData, + float inputScale, + int32_t inputOffset, + const InputType* filterData, + float filterScale, + int32_t filterOffset, + const BiasType* biasData, + InputType* outputData, + float outputScale, + int32_t outputOffset, + const TensorInfo& filterInfo, + bool depthwise = false) +{ + if (data.m_Parameters.m_BiasEnabled && !biasData) + { + throw InvalidArgumentException("Bias is enabled but the bias data is invalid"); + } + + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + + unsigned int depthMult = depthwise ? filterInfo.GetShape()[0] : 1; + unsigned int channelsInput = filterInfo.GetShape()[1]; + unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0]; + + unsigned int batchSize = outputInfo0.GetShape()[0]; + unsigned int heightOutput = outputInfo0.GetShape()[2]; + unsigned int widthOutput = outputInfo0.GetShape()[3]; + unsigned int heightInput = inputInfo0.GetShape()[2]; + unsigned int widthInput = inputInfo0.GetShape()[3]; + + unsigned int heightFilter = filterInfo.GetShape()[2]; + unsigned int widthFilter = filterInfo.GetShape()[3]; + + unsigned int paddingTop = data.m_Parameters.m_PadTop; + unsigned int paddingLeft = data.m_Parameters.m_PadLeft; + unsigned int hStride = data.m_Parameters.m_StrideY; + unsigned int xStride = data.m_Parameters.m_StrideX; + + // The world's least efficient convolution. + for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++) + { + for (unsigned int cOutput = 0; cOutput < channelsOutput; cOutput++) + { + for (unsigned int yOutput = 0; yOutput < heightOutput; yOutput++) + { + for (unsigned int xOutput = 0; xOutput < widthOutput; xOutput++) + { + // This loop goes over each output element. + AccumulatorType sum = AccumulatorType(); + + // For depthwise, each output channel corresponds to exactly one input channel. + // For normal, must loop over each input channel. + for (unsigned int cInput = 0; cInput < (depthwise ? 1 : channelsInput); cInput++) + { + unsigned int depthwiseMultiplierIdx = 0; + if (depthwise) + { + cInput = cOutput / depthMult; + depthwiseMultiplierIdx = cOutput % depthMult; + } + + for (unsigned int yFilter = 0; yFilter < heightFilter; yFilter++) + { + for (unsigned int xFilter = 0; xFilter < widthFilter; xFilter++) + { + // This loop goes over each input element for each output element. + + unsigned int filterIndex; + + // Since dimensionality of kernel depends on depthwiseness, so does index. + if (depthwise) + { + filterIndex = depthwiseMultiplierIdx * widthFilter * heightFilter * channelsInput + + cInput * widthFilter * heightFilter + + yFilter * widthFilter + + xFilter; + } + else + { + filterIndex = cOutput * widthFilter * heightFilter * channelsInput + + cInput * widthFilter * heightFilter + + yFilter * widthFilter + + xFilter; + } + AccumulatorType filterValue = filterData[filterIndex] - + boost::numeric_cast(filterOffset); + + unsigned int yInput = yOutput * hStride + yFilter; + unsigned int xInput = xOutput * xStride + xFilter; + + AccumulatorType inputValue; + + // Check if we're in the padding. + if (yInput < paddingTop || yInput >= heightInput + paddingTop || + xInput < paddingLeft || xInput >= widthInput + paddingLeft ) + { + inputValue = AccumulatorType(); + } + else + { + inputValue = inputData[batchIdx * widthInput * heightInput * channelsInput + + widthInput * heightInput * cInput + + widthInput * (yInput - paddingTop) + + xInput - paddingLeft] - + boost::numeric_cast(inputOffset); + } + sum += filterValue * inputValue; + } + } + } + + if (data.m_Parameters.m_BiasEnabled) + { + sum += biasData[cOutput]; + } + + if (outputScale != 0.0f) + { + float multiplier = (inputScale * filterScale) / outputScale; + // Apply the multiplier to sum, but do so using some quantized arithmetic which is consistent + // with the AndroidNN CPU implementation. This should be (roughly) equivalent to: + // sum = std::round(multiplier * sum + outputOffset); + sum = boost::numeric_cast( + QuantizedMultiplierSmallerThanOne(multiplier) * boost::numeric_cast(sum)) + + boost::numeric_cast(outputOffset); + sum = std::min(std::max(sum, 0), 255); + } + + outputData[batchIdx * widthOutput * heightOutput * channelsOutput + + widthOutput * heightOutput * cOutput + + widthOutput * yOutput + + xOutput] = boost::numeric_cast(sum); + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/FullyConnected.cpp b/src/backends/RefWorkloads/FullyConnected.cpp new file mode 100644 index 0000000000..bf5814d2ad --- /dev/null +++ b/src/backends/RefWorkloads/FullyConnected.cpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "FullyConnected.hpp" + +#include + +namespace armnn +{ + +void FullyConnected(const float* inputData, + float* outputData, + const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, + const float* weightData, + const float* biasData, + bool transposeWeights) +{ + unsigned int N = outputTensorInfo.GetShape()[1]; // Outputs Vector Size. + + BOOST_ASSERT(inputTensorInfo.GetNumDimensions() > 1); // Needs some data. + + unsigned int K = 1; // Total number of activations in the input. + for (unsigned int i = 1; i < inputTensorInfo.GetNumDimensions(); i++) + { + K *= inputTensorInfo.GetShape()[i]; + } + + for (unsigned int n = 0; n < inputTensorInfo.GetShape()[0]; n++) + { + for (unsigned int channelOutput = 0; channelOutput < N; channelOutput++) + { + float outval = 0.f; + + for (unsigned int channelInput = 0; channelInput < K; channelInput++) + { + float weight; + if (transposeWeights) + { + weight = weightData[channelOutput * K + channelInput]; + } + else + { + weight = weightData[channelInput * N + channelOutput]; + } + + outval += weight * inputData[n * K + channelInput]; + } + + if (biasData) + { + outval += biasData[channelOutput]; + } + + outputData[n * N + channelOutput] = outval; + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/FullyConnected.hpp b/src/backends/RefWorkloads/FullyConnected.hpp new file mode 100644 index 0000000000..623259f8f8 --- /dev/null +++ b/src/backends/RefWorkloads/FullyConnected.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +/// Performs a matrix multiplication and optionally adds a bias. +void FullyConnected(const float* inputData, + float* outputData, + const TensorInfo& inputTensorInfo, + const TensorInfo& outputTensorInfo, + const float* weightData, + const float* biasData, + bool transposeWeights); + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Merger.hpp b/src/backends/RefWorkloads/Merger.hpp new file mode 100644 index 0000000000..867925faa2 --- /dev/null +++ b/src/backends/RefWorkloads/Merger.hpp @@ -0,0 +1,82 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include "backends/WorkloadData.hpp" + +#include + +namespace armnn +{ + +template +void Merger(const MergerQueueDescriptor& data) +{ + const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]); + + for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index) + { + unsigned int indices[MaxNumOfTensorDimensions] = { 0 }; + + unsigned int indexRemainder = index; + unsigned int dimensionStride = outputInfo0.GetNumElements(); + + for (unsigned int i=0; i= view.m_Origin[i] + inputInfo.GetShape()[i]) + { + insideView = false; + } + } + + if (insideView) + { + unsigned int inIndex = 0; + unsigned int dimensionStride = 1; + + for (unsigned int i = inputInfo.GetNumDimensions(); i-- > 0;) + { + inIndex += dimensionStride * (indices[i] - view.m_Origin[i]); + dimensionStride *= inputInfo.GetShape()[i]; + } + + //We are within the view, copy input data to the output corresponding to this view. + (GetOutputTensorData(0, data))[index] = + (GetInputTensorData(viewIdx, data))[inIndex]; + + //What should we do if input views overlap on the output tensor? + //We could error, take the average, or shm else... + //For now just stop after finding first view (input) that matches. + break; + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Pooling2d.cpp b/src/backends/RefWorkloads/Pooling2d.cpp new file mode 100644 index 0000000000..5812a290e7 --- /dev/null +++ b/src/backends/RefWorkloads/Pooling2d.cpp @@ -0,0 +1,241 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Pooling2d.hpp" + +#include +#include + +#include + +#include +#include +#include + +namespace +{ + using PoolingAlgorithm = armnn::PoolingAlgorithm; + + float DefaultInitializer(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return std::numeric_limits::lowest(); + } + case PoolingAlgorithm::Average: + case PoolingAlgorithm::L2: + { + return 0.0f; + } + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + using Accumulator = std::function; + + Accumulator GetAccumulator(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return [](float & accu, float value) { + if (value > accu) { + accu = value; + } + }; + } + + case PoolingAlgorithm::Average: + { + return [](float & accu, float value) { + accu += value; + }; + } + + case PoolingAlgorithm::L2: + { + return [](float & accu, float value) { + accu += (value*value); + }; + } + + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + using Executor = std::function; + + Executor GetExecutor(PoolingAlgorithm algorithm) + { + switch (algorithm) + { + case PoolingAlgorithm::Max: + { + return [](float & accumulated, float kernelSize) {}; + } + + case PoolingAlgorithm::Average: + { + return [](float & accumulated, float kernelSize) { + accumulated /= kernelSize; + }; + } + + case PoolingAlgorithm::L2: + { + return [](float & accumulated, float kernelSize) { + accumulated = sqrtf(accumulated / kernelSize); + }; + } + + default: + { + throw armnn::InvalidArgumentException("Unsupported pooling algorithm"); + } + } + } + + bool OnPaddingOnly(int start, int end, int maxRange, int padding) + { + if (end <= 0 || start > (maxRange - padding)) + { + return true; + } + else + { + return false; + } + } + + + bool ClampRange(int & start, int & end, int maxRange) + { + if (start < 0 || end > maxRange) + { + start = std::min(std::max(start, 0), maxRange); + end = std::min(std::max(end, 0), maxRange); + return true; + } + else + { + return false; + } + } +} + +namespace armnn +{ + +void Pooling2d(const float* in, + float* out, + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const Pooling2dDescriptor& params) +{ + const int batchSize = boost::numeric_cast(outputInfo.GetShape()[0]); + const int channels = boost::numeric_cast(outputInfo.GetShape()[1]); + const int heightOutput = boost::numeric_cast(outputInfo.GetShape()[2]); + const int widthOutput = boost::numeric_cast(outputInfo.GetShape()[3]); + const int heightInput = boost::numeric_cast(inputInfo.GetShape()[2]); + const int widthInput = boost::numeric_cast(inputInfo.GetShape()[3]); + const int padLeft = boost::numeric_cast(params.m_PadLeft); + const int padRight = boost::numeric_cast(params.m_PadRight); + const int padTop = boost::numeric_cast(params.m_PadTop); + const int padBottom = boost::numeric_cast(params.m_PadBottom); + const int strideX = boost::numeric_cast(params.m_StrideX); + const int strideY = boost::numeric_cast(params.m_StrideY); + const int poolHeight = boost::numeric_cast(params.m_PoolHeight); + const int poolWidth = boost::numeric_cast(params.m_PoolWidth); + + float defaultInitializer = DefaultInitializer(params.m_PoolType); + + Accumulator accumulate = GetAccumulator(params.m_PoolType); + Executor execute = GetExecutor(params.m_PoolType); + + // Check supported padding methods outside the loop to simplify + // the inner loop. + if (params.m_PaddingMethod != PaddingMethod::Exclude && + params.m_PaddingMethod != PaddingMethod::IgnoreValue) + { + throw armnn::InvalidArgumentException("Unsupported padding type"); + } + + for (int n = 0; n < batchSize; n++) + { + for (int c = 0; c < channels; c++) + { + for (int yOutput = 0; yOutput < heightOutput; yOutput++) + { + for (int xOutput = 0; xOutput < widthOutput; xOutput++) + { + int hstart = (yOutput * strideY) - padTop; + int wstart = (xOutput * strideX) - padLeft; + int hend = hstart + poolHeight; + int wend = wstart + poolWidth; + + // Clamp the pooling region inside the valid input area (which includes the padding). + // This is necessary because the final pooling in a row may overlap beyond the padding. + hend = std::min(hend, heightInput + padBottom); + wend = std::min(wend, widthInput + padRight); + + float result = defaultInitializer; + float poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); + + // Special case: when the pooling kernel is over a padding region and the padding + // size is larger or equal to the kernel and the kernel only covers + // padding and no real values, then we initialize the result as zero + // by convention. This is because we need to choose a value here and + // all values we have are padding, which we ignore. + if (OnPaddingOnly(hstart, hend, heightInput, padBottom) || + OnPaddingOnly(wstart, wend, widthInput, padRight)) + { + result = 0.0f; + } + + bool clamped = ClampRange(wstart, wend, widthInput); + clamped |= ClampRange(hstart, hend, heightInput); + + if (clamped && params.m_PaddingMethod == PaddingMethod::Exclude) + { + // When we exclude the padding, it means we calculate with a smaller + // kernel size, so I changed the divisor here. + poolAreaSize = boost::numeric_cast((hend - hstart) * (wend - wstart)); + } + + for (auto yInput = hstart; yInput < hend; yInput++) + { + for (auto xInput = wstart; xInput < wend; xInput++) + { + float inval = in[n * widthInput * heightInput * channels + + c * widthInput * heightInput + + yInput * widthInput + + xInput]; + + accumulate(result, inval); + } + } + + execute(result, poolAreaSize); + + out[n * widthOutput * heightOutput * channels + + c * widthOutput * heightOutput + + yOutput * widthOutput + + xOutput] = result; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Pooling2d.hpp b/src/backends/RefWorkloads/Pooling2d.hpp new file mode 100644 index 0000000000..da56b25c4e --- /dev/null +++ b/src/backends/RefWorkloads/Pooling2d.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +/// Computes the Pooling2d operation. +void Pooling2d(const float* in, + float* out, + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const Pooling2dDescriptor& params); + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefActivationFloat32Workload.cpp b/src/backends/RefWorkloads/RefActivationFloat32Workload.cpp new file mode 100644 index 0000000000..3cc59be7a4 --- /dev/null +++ b/src/backends/RefWorkloads/RefActivationFloat32Workload.cpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefActivationFloat32Workload.hpp" + +#include "Activation.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefActivationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationFloat32Workload_Execute"); + + Activation(GetInputTensorDataFloat(0, m_Data), + GetOutputTensorDataFloat(0, m_Data), + GetTensorInfo(m_Data.m_Inputs[0]), + m_Data.m_Parameters.m_Function, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefActivationFloat32Workload.hpp b/src/backends/RefWorkloads/RefActivationFloat32Workload.hpp new file mode 100644 index 0000000000..0de33f02ff --- /dev/null +++ b/src/backends/RefWorkloads/RefActivationFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +namespace armnn +{ + +class RefActivationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefActivationUint8Workload.cpp b/src/backends/RefWorkloads/RefActivationUint8Workload.cpp new file mode 100644 index 0000000000..b95c2e22a8 --- /dev/null +++ b/src/backends/RefWorkloads/RefActivationUint8Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefActivationUint8Workload.hpp" + +#include "Activation.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefActivationUint8Workload_Execute"); + + const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); + + std::vector results(tensorInfo.GetNumElements()); + + Activation(dequant.data(), + results.data(), + tensorInfo, + m_Data.m_Parameters.m_Function, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefActivationUint8Workload.hpp b/src/backends/RefWorkloads/RefActivationUint8Workload.hpp new file mode 100644 index 0000000000..f38888a9f7 --- /dev/null +++ b/src/backends/RefWorkloads/RefActivationUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefActivationUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefArithmeticWorkload.cpp b/src/backends/RefWorkloads/RefArithmeticWorkload.cpp new file mode 100644 index 0000000000..6c39fa1186 --- /dev/null +++ b/src/backends/RefWorkloads/RefArithmeticWorkload.cpp @@ -0,0 +1,69 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefArithmeticWorkload.hpp" +#include "ArithmeticFunction.hpp" +#include "RefWorkloadUtils.hpp" +#include "Profiling.hpp" +#include + +namespace armnn +{ + +template +void BaseFloat32ArithmeticWorkload::ExecuteImpl(const char * debugString) const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString); + + auto data = Float32Workload::GetData(); + const TensorShape& inShape0 = GetTensorInfo(data.m_Inputs[0]).GetShape(); + const TensorShape& inShape1 = GetTensorInfo(data.m_Inputs[1]).GetShape(); + const TensorShape& outShape = GetTensorInfo(data.m_Outputs[0]).GetShape(); + + const float* inData0 = GetInputTensorDataFloat(0, data); + const float* inData1 = GetInputTensorDataFloat(1, data); + float* outData = GetOutputTensorDataFloat(0, data); + + ArithmeticFunction(inShape0, inShape1, outShape, inData0, inData1, outData); +} + +template +void BaseUint8ArithmeticWorkload::ExecuteImpl(const char * debugString) const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString); + + auto data = Uint8Workload::GetData(); + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(data.m_Inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); + + auto dequant0 = Dequantize(GetInputTensorDataU8(0, data), inputInfo0); + auto dequant1 = Dequantize(GetInputTensorDataU8(1, data), inputInfo1); + + std::vector results(outputInfo.GetNumElements()); + + ArithmeticFunction(inputInfo0.GetShape(), + inputInfo1.GetShape(), + outputInfo.GetShape(), + dequant0.data(), + dequant1.data(), + results.data()); + + Quantize(GetOutputTensorDataU8(0, data), results.data(), outputInfo); +} + +} + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; + +template class armnn::BaseFloat32ArithmeticWorkload>; +template class armnn::BaseUint8ArithmeticWorkload>; diff --git a/src/backends/RefWorkloads/RefArithmeticWorkload.hpp b/src/backends/RefWorkloads/RefArithmeticWorkload.hpp new file mode 100644 index 0000000000..7197b7a883 --- /dev/null +++ b/src/backends/RefWorkloads/RefArithmeticWorkload.hpp @@ -0,0 +1,122 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include "backends/StringMapping.hpp" +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +template +class RefArithmeticWorkload +{ + // Needs specialization. The default is empty on purpose. +}; + +template +class BaseFloat32ArithmeticWorkload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + void ExecuteImpl(const char * debugString) const; +}; + +template +class RefArithmeticWorkload + : public BaseFloat32ArithmeticWorkload +{ +public: + using BaseFloat32ArithmeticWorkload::BaseFloat32ArithmeticWorkload; + + virtual void Execute() const override + { + using Parent = BaseFloat32ArithmeticWorkload; + Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString)); + } +}; + +template +class BaseUint8ArithmeticWorkload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + void ExecuteImpl(const char * debugString) const; +}; + +template +class RefArithmeticWorkload + : public BaseUint8ArithmeticWorkload +{ +public: + using BaseUint8ArithmeticWorkload::BaseUint8ArithmeticWorkload; + + virtual void Execute() const override + { + using Parent = BaseUint8ArithmeticWorkload; + Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString)); + } +}; + +using RefAdditionFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + AdditionQueueDescriptor, + StringMapping::RefAdditionWorkload_Execute>; + +using RefAdditionUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + AdditionQueueDescriptor, + StringMapping::RefAdditionWorkload_Execute>; + + +using RefSubtractionFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + SubtractionQueueDescriptor, + StringMapping::RefSubtractionWorkload_Execute>; + +using RefSubtractionUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + SubtractionQueueDescriptor, + StringMapping::RefSubtractionWorkload_Execute>; + +using RefMultiplicationFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + MultiplicationQueueDescriptor, + StringMapping::RefMultiplicationWorkload_Execute>; + +using RefMultiplicationUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + MultiplicationQueueDescriptor, + StringMapping::RefMultiplicationWorkload_Execute>; + +using RefDivisionFloat32Workload = + RefArithmeticWorkload, + DataType::Float32, + DivisionQueueDescriptor, + StringMapping::RefDivisionWorkload_Execute>; + +using RefDivisionUint8Workload = + RefArithmeticWorkload, + DataType::QuantisedAsymm8, + DivisionQueueDescriptor, + StringMapping::RefDivisionWorkload_Execute>; + +} // armnn diff --git a/src/backends/RefWorkloads/RefBaseConstantWorkload.cpp b/src/backends/RefWorkloads/RefBaseConstantWorkload.cpp new file mode 100644 index 0000000000..647677b4fb --- /dev/null +++ b/src/backends/RefWorkloads/RefBaseConstantWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBaseConstantWorkload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include + +#include + +#include + +namespace armnn +{ + +template +void RefBaseConstantWorkload::Execute() const +{ + // Considering the reference backend independently, it could be possible to initialise the intermediate tensor + // created by the layer output handler at workload construction time, rather than at workload execution time. + // However, this is not an option for other backends (e.g. CL). For consistency, we prefer to align all + // implementations. + // A similar argument can be made about performing the memory copy in the first place (the layer output handler + // could have a non-owning reference to the layer output tensor managed by the const input layer); again, this is + // not an option for other backends, and the extra complexity required to make this work for the reference backend + // may not be worth the effort (skipping a memory copy in the first inference). + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + + const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]); + BOOST_ASSERT(data.m_LayerOutput->GetTensorInfo().GetNumBytes() == outputInfo.GetNumBytes()); + + memcpy(GetOutputTensorData(0, data), data.m_LayerOutput->GetConstTensor(), + outputInfo.GetNumBytes()); + + m_RanOnce = true; + } +} + +template class RefBaseConstantWorkload; +template class RefBaseConstantWorkload; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefBaseConstantWorkload.hpp b/src/backends/RefWorkloads/RefBaseConstantWorkload.hpp new file mode 100644 index 0000000000..8dc9fd6104 --- /dev/null +++ b/src/backends/RefWorkloads/RefBaseConstantWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types. +template +class RefBaseConstantWorkload : public TypedWorkload +{ +public: + RefBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp b/src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..313af9c438 --- /dev/null +++ b/src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBatchNormalizationFloat32Workload.hpp" + +#include "BatchNormImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Mean(std::make_unique(*(descriptor.m_Mean))), + m_Variance(std::make_unique(*(descriptor.m_Variance))), + m_Beta(std::make_unique(*(descriptor.m_Beta))), + m_Gamma(std::make_unique(*(descriptor.m_Gamma))) {} + +void RefBatchNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute"); + + const float* var = m_Variance->GetConstTensor(); + const float* mean = m_Mean->GetConstTensor(); + const float* gamma = m_Gamma->GetConstTensor(); + const float* beta = m_Beta->GetConstTensor(); + + auto inputData = GetInputTensorDataFloat(0, m_Data); + auto outputData = GetOutputTensorDataFloat(0, m_Data); + + BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp b/src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..15c843c2ca --- /dev/null +++ b/src/backends/RefWorkloads/RefBatchNormalizationFloat32Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefBatchNormalizationFloat32Workload : public Float32Workload +{ +public: + explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Beta; + std::unique_ptr m_Gamma; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp b/src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp new file mode 100644 index 0000000000..e248ad4b9d --- /dev/null +++ b/src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefBatchNormalizationUint8Workload.hpp" + +#include "BatchNormImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ +RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Mean(std::make_unique(*(descriptor.m_Mean))), + m_Variance(std::make_unique(*(descriptor.m_Variance))), + m_Beta(std::make_unique(*(descriptor.m_Beta))), + m_Gamma(std::make_unique(*(descriptor.m_Gamma))) {} + +void RefBatchNormalizationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& varInfo = GetTensorInfo(m_Variance.get()); + const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get()); + const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get()); + const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get()); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0); + auto var = Dequantize(m_Variance->GetConstTensor(), varInfo); + auto mean = Dequantize(m_Mean->GetConstTensor(), meanInfo); + auto gamma = Dequantize(m_Gamma->GetConstTensor(), gammaInfo); + auto beta = Dequantize(m_Beta->GetConstTensor(), betaInfo); + + std::vector results(outputInfo.GetNumElements()); + BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data()); + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp b/src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp new file mode 100644 index 0000000000..d3e8e0a120 --- /dev/null +++ b/src/backends/RefWorkloads/RefBatchNormalizationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefBatchNormalizationUint8Workload : public Uint8Workload +{ +public: + explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Beta; + std::unique_ptr m_Gamma; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConstantFloat32Workload.cpp b/src/backends/RefWorkloads/RefConstantFloat32Workload.cpp new file mode 100644 index 0000000000..074e8ccaae --- /dev/null +++ b/src/backends/RefWorkloads/RefConstantFloat32Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConstantFloat32Workload.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConstantFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantFloat32Workload_Execute"); + RefBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConstantFloat32Workload.hpp b/src/backends/RefWorkloads/RefConstantFloat32Workload.hpp new file mode 100644 index 0000000000..76e3a42026 --- /dev/null +++ b/src/backends/RefWorkloads/RefConstantFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefBaseConstantWorkload.hpp" + +namespace armnn +{ + +class RefConstantFloat32Workload : public RefBaseConstantWorkload +{ +public: + using RefBaseConstantWorkload::RefBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConstantUint8Workload.cpp b/src/backends/RefWorkloads/RefConstantUint8Workload.cpp new file mode 100644 index 0000000000..07e4719d54 --- /dev/null +++ b/src/backends/RefWorkloads/RefConstantUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConstantUint8Workload.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConstantUint8Workload_Execute"); + RefBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConstantUint8Workload.hpp b/src/backends/RefWorkloads/RefConstantUint8Workload.hpp new file mode 100644 index 0000000000..02552ac80b --- /dev/null +++ b/src/backends/RefWorkloads/RefConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefBaseConstantWorkload.hpp" + +namespace armnn +{ + +class RefConstantUint8Workload : public RefBaseConstantWorkload +{ +public: + using RefBaseConstantWorkload::RefBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..e148bf6a9d --- /dev/null +++ b/src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.cpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "RefWorkloadUtils.hpp" +#include "FloatingPointConverter.hpp" + +namespace armnn +{ + +void RefConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp16ToFp32Workload_Execute"); + + const Half* const input = GetInputTensorDataHalf(0, m_Data); + float* const output = GetOutputTensorDataFloat(0, m_Data); + + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..5e841ba34f --- /dev/null +++ b/src/backends/RefWorkloads/RefConvertFp16ToFp32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + using Float16ToFloat32Workload::Float16ToFloat32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..efaaf8e1ad --- /dev/null +++ b/src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToFp16Workload_Execute"); + + const float* const input = GetInputTensorDataFloat(0, m_Data); + Half* const output = GetOutputTensorDataHalf(0, m_Data); + + // convert Fp32 input to Fp16 output + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..0754fd5c79 --- /dev/null +++ b/src/backends/RefWorkloads/RefConvertFp32ToFp16Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + using Float32ToFloat16Workload::Float32ToFloat16Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp b/src/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..20905646d7 --- /dev/null +++ b/src/backends/RefWorkloads/RefConvolution2dFloat32Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvolution2dFloat32Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefConvolution2dFloat32Workload::RefConvolution2dFloat32Workload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dFloat32Workload_Execute"); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Weight->template GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : nullptr; + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl( + m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp b/src/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..34489e807c --- /dev/null +++ b/src/backends/RefWorkloads/RefConvolution2dFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvolution2dFloat32Workload : public Float32Workload +{ +public: + explicit RefConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; + +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp b/src/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..881e9bf6b0 --- /dev/null +++ b/src/backends/RefWorkloads/RefConvolution2dUint8Workload.cpp @@ -0,0 +1,45 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefConvolution2dUint8Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefConvolution2dUint8Workload::RefConvolution2dUint8Workload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvolution2dUint8Workload_Execute"); + + const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const uint8_t* weightsData = m_Weight->template GetConstTensor(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : + nullptr; + uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl( + m_Data, + inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), + weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), + biasData, + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp b/src/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..0e2dd6aada --- /dev/null +++ b/src/backends/RefWorkloads/RefConvolution2dUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefConvolution2dUint8Workload : public Uint8Workload +{ +public: + explicit RefConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; + +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp b/src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp new file mode 100644 index 0000000000..e89013b9bd --- /dev/null +++ b/src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDepthwiseConvolution2dFloat32Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefDepthwiseConvolution2dFloat32Workload::RefDepthwiseConvolution2dFloat32Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefDepthwiseConvolution2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dFloat32Workload_Execute"); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Weight->template GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : nullptr; + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl + (m_Data, inputData, 0.0f, 0, weightData, 0.0f, 0, biasData, outputData, 0.0f, 0, filterInfo, true); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp new file mode 100644 index 0000000000..8f1227e2de --- /dev/null +++ b/src/backends/RefWorkloads/RefDepthwiseConvolution2dFloat32Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDepthwiseConvolution2dFloat32Workload : public Float32Workload +{ +public: + explicit RefDepthwiseConvolution2dFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp b/src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..e8e501d6ae --- /dev/null +++ b/src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefDepthwiseConvolution2dUint8Workload.hpp" + +#include "ConvImpl.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +RefDepthwiseConvolution2dUint8Workload::RefDepthwiseConvolution2dUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefDepthwiseConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthwiseConvolution2dUint8Workload_Execute"); + + const uint8_t* inputData = GetInputTensorDataU8(0, m_Data); + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const uint8_t* weightsData = m_Weight->template GetConstTensor(); + const TensorInfo& weightsInfo = GetTensorInfo(m_Weight.get()); + const int32_t* biasData = m_Data.m_Parameters.m_BiasEnabled ? + m_Bias->template GetConstTensor() : + nullptr; + uint8_t* outputData = GetOutputTensorDataU8(0, m_Data); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + const TensorInfo& filterInfo = m_Weight->GetTensorInfo(); + + ConvImpl( + m_Data, + inputData, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), + weightsData, weightsInfo.GetQuantizationScale(), weightsInfo.GetQuantizationOffset(), + biasData, + outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), filterInfo, true); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..c615cf7880 --- /dev/null +++ b/src/backends/RefWorkloads/RefDepthwiseConvolution2dUint8Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefDepthwiseConvolution2dUint8Workload : public Uint8Workload +{ +public: + explicit RefDepthwiseConvolution2dUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp b/src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp new file mode 100644 index 0000000000..3e16f60b11 --- /dev/null +++ b/src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFakeQuantizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void FakeQuantization(const float* inputData, float* outputData, uint32_t numElements, float min, float max) +{ + float scale = (max - min) / 255.f; + int32_t offset = boost::numeric_cast((-min * 255.f) / (max - min)); + + for (uint32_t i = 0; i < numElements; i++) + { + outputData[i] = static_cast(armnn::Quantize(inputData[i], scale, offset)); + } + +} + +void RefFakeQuantizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFakeQuantizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + const float* inputData = GetInputTensorDataFloat(0, m_Data); + float* outputData = GetOutputTensorDataFloat(0, m_Data); + FakeQuantization(inputData, outputData, inputInfo.GetNumElements(), + m_Data.m_Parameters.m_Min, + m_Data.m_Parameters.m_Max); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp new file mode 100644 index 0000000000..523fdcff50 --- /dev/null +++ b/src/backends/RefWorkloads/RefFakeQuantizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFakeQuantizationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFloorFloat32Workload.cpp b/src/backends/RefWorkloads/RefFloorFloat32Workload.cpp new file mode 100644 index 0000000000..cc1f8800dc --- /dev/null +++ b/src/backends/RefWorkloads/RefFloorFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFloorFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefFloorFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFloorFloat32Workload_Execute"); + + const float* const input = GetInputTensorDataFloat(0, m_Data); + float* const output = GetOutputTensorDataFloat(0, m_Data); + + unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements(); + for (unsigned int i = 0; i < numElements; ++i) + { + output[i] = floorf(input[i]); + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFloorFloat32Workload.hpp b/src/backends/RefWorkloads/RefFloorFloat32Workload.hpp new file mode 100644 index 0000000000..d7cfa50365 --- /dev/null +++ b/src/backends/RefWorkloads/RefFloorFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFloorFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp b/src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp new file mode 100644 index 0000000000..ccaf4cd87b --- /dev/null +++ b/src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFullyConnectedFloat32Workload.hpp" + +#include "FullyConnected.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ +RefFullyConnectedFloat32Workload::RefFullyConnectedFloat32Workload( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) + : Float32Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefFullyConnectedFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + const float* weightData = m_Weight->GetConstTensor(); + const float* biasData = m_Data.m_Parameters.m_BiasEnabled ? m_Bias->GetConstTensor() : nullptr; + + FullyConnected(inputData, + outputData, + inputInfo, + outputInfo, + weightData, + biasData, + m_Data.m_Parameters.m_TransposeWeightMatrix); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp b/src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp new file mode 100644 index 0000000000..ce058690ac --- /dev/null +++ b/src/backends/RefWorkloads/RefFullyConnectedFloat32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFullyConnectedFloat32Workload : public Float32Workload +{ +public: + explicit RefFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp b/src/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp new file mode 100644 index 0000000000..cd785d786c --- /dev/null +++ b/src/backends/RefWorkloads/RefFullyConnectedUint8Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefFullyConnectedUint8Workload.hpp" + +#include "FullyConnected.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ +RefFullyConnectedUint8Workload::RefFullyConnectedUint8Workload( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info), + m_Weight(std::make_unique(*(descriptor.m_Weight))), + m_Bias(descriptor.m_Parameters.m_BiasEnabled + ? std::make_unique(*(descriptor.m_Bias)) : nullptr) {} + +void RefFullyConnectedUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefFullyConnectedUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + const uint8_t* weightData = m_Weight->GetConstTensor(); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + auto weight = Dequantize(weightData, m_Weight->GetTensorInfo()); + + std::vector results(outputInfo.GetNumElements()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + const int32_t* biasData = m_Bias->GetConstTensor(); + auto bias = Dequantize(biasData, m_Bias->GetTensorInfo()); + + FullyConnected(dequant.data(), + results.data(), + inputInfo, + outputInfo, + weight.data(), + bias.data(), + m_Data.m_Parameters.m_TransposeWeightMatrix); + } + else + { + FullyConnected(dequant.data(), + results.data(), + inputInfo, + outputInfo, + weight.data(), + nullptr, + m_Data.m_Parameters.m_TransposeWeightMatrix); + } + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp b/src/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp new file mode 100644 index 0000000000..e489cc7d81 --- /dev/null +++ b/src/backends/RefWorkloads/RefFullyConnectedUint8Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefFullyConnectedUint8Workload : public Uint8Workload +{ +public: + explicit RefFullyConnectedUint8Workload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + std::unique_ptr m_Weight; + std::unique_ptr m_Bias; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp b/src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..973c87b009 --- /dev/null +++ b/src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.cpp @@ -0,0 +1,61 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefL2NormalizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "TensorBufferArrayView.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefL2NormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefL2NormalizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + TensorBufferArrayView input(inputInfo.GetShape(), GetInputTensorDataFloat(0, m_Data)); + TensorBufferArrayView output(outputInfo.GetShape(), GetOutputTensorDataFloat(0, m_Data)); + + const unsigned int batchSize = inputInfo.GetShape()[0]; + const unsigned int depth = inputInfo.GetShape()[1]; + const unsigned int rows = inputInfo.GetShape()[2]; + const unsigned int cols = inputInfo.GetShape()[3]; + + for (unsigned int n = 0; n < batchSize; ++n) + { + for (unsigned int d = 0; d < depth; ++d) + { + for (unsigned int h = 0; h < rows; ++h) + { + for (unsigned int w = 0; w < cols; ++w) + { + float reduction = 0.0; + for (unsigned int c = 0; c < depth; ++c) + { + const float value = input.Get(n, c, h, w); + reduction += value * value; + } + + // Using std::max(reduction, epsilon) below would prevent against division by 0. + // However, at the time of writing: + // - This is not supported by the ACL functions used to implement L2Normalization in the CL + // backend. + // - The reference semantics for this operator do not include this parameter. + const float scale = 1.0f / sqrtf(reduction); + output.Get(n, d, h, w) = input.Get(n, d, h, w) * scale; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp b/src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..a3f03f3060 --- /dev/null +++ b/src/backends/RefWorkloads/RefL2NormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefL2NormalizationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefLstmFloat32Workload.cpp b/src/backends/RefWorkloads/RefLstmFloat32Workload.cpp new file mode 100644 index 0000000000..50ff605701 --- /dev/null +++ b/src/backends/RefWorkloads/RefLstmFloat32Workload.cpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefLstmFloat32Workload.hpp" + +namespace armnn +{ + +void RefLstmFloat32Workload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Ref backend!"); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefLstmFloat32Workload.hpp b/src/backends/RefWorkloads/RefLstmFloat32Workload.hpp new file mode 100644 index 0000000000..fc4f7776c6 --- /dev/null +++ b/src/backends/RefWorkloads/RefLstmFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefLstmFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefMergerFloat32Workload.cpp b/src/backends/RefWorkloads/RefMergerFloat32Workload.cpp new file mode 100644 index 0000000000..b1f8a32ee7 --- /dev/null +++ b/src/backends/RefWorkloads/RefMergerFloat32Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefMergerFloat32Workload.hpp" + +#include "Merger.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMergerFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerFloat32Workload_Execute"); + Merger(m_Data); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefMergerFloat32Workload.hpp b/src/backends/RefWorkloads/RefMergerFloat32Workload.hpp new file mode 100644 index 0000000000..23a523c852 --- /dev/null +++ b/src/backends/RefWorkloads/RefMergerFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMergerFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefMergerUint8Workload.cpp b/src/backends/RefWorkloads/RefMergerUint8Workload.cpp new file mode 100644 index 0000000000..47ce1cf731 --- /dev/null +++ b/src/backends/RefWorkloads/RefMergerUint8Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefMergerUint8Workload.hpp" + +#include "Merger.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefMergerUint8Workload_Execute"); + Merger(m_Data); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefMergerUint8Workload.hpp b/src/backends/RefWorkloads/RefMergerUint8Workload.hpp new file mode 100644 index 0000000000..65dc42120a --- /dev/null +++ b/src/backends/RefWorkloads/RefMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefMergerUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp b/src/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp new file mode 100644 index 0000000000..5c24416624 --- /dev/null +++ b/src/backends/RefWorkloads/RefNormalizationFloat32Workload.cpp @@ -0,0 +1,185 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefNormalizationFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +#include +#include + +namespace armnn +{ + +// Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization. +static void NormalizeWithinUingLbr(const float* inputData, + float* outputData, + const TensorShape& tensorShape, + uint32_t norm_size, + float alpha, + float beta, + float kappa) +{ + const unsigned int batchSize = tensorShape[0]; + const unsigned int depth = tensorShape[1]; + const unsigned int rows = tensorShape[2]; + const unsigned int cols = tensorShape[3]; + + int radius = boost::numeric_cast(norm_size / 2u); /* Strong Assumption on rounding Mode */ + + for (unsigned int n = 0; n < batchSize; n++) + { + for (unsigned int c = 0; c < depth; c++) + { + for (unsigned int h = 0; h < rows; h++) + { + for (unsigned int w = 0; w < cols; w++) + { + float accumulated_scale = 0.0; + for (int y = -radius; y <= radius; y++) + { + for (int x = -radius; x <= radius; x++) + { + int i = boost::numeric_cast(w) + x; + int j = boost::numeric_cast(h) + y; + + if ((i < 0) || (i >= boost::numeric_cast(cols))) + { + continue; + } + + if ((j < 0) || (j >= boost::numeric_cast(rows))) + { + continue; + } + + float inval = inputData[n * cols * rows * depth + + c * cols * rows + + boost::numeric_cast(j) * cols + + boost::numeric_cast(i)]; + + accumulated_scale += inval*inval; + } + } + outputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] = inputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] / (powf((kappa + (accumulated_scale * alpha)), beta)); + } + } + } + } +} + +// Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization. +void NormalizeAcrossUingLbr(const float* inputData, + float* outputData, + const TensorShape& tensorShape, + uint32_t norm_size, + float alpha, + float beta, + float kappa) +{ + const unsigned int batchSize = tensorShape[0]; + const unsigned int depth = tensorShape[1]; + const unsigned int rows = tensorShape[2]; + const unsigned int cols = tensorShape[3]; + + int radius = boost::numeric_cast(norm_size / 2u); /* Strong Assumption on rounding Mode */ + + for (unsigned int n = 0; n < batchSize; n++) + { + for (unsigned int c = 0; c < depth; c++) + { + for (unsigned int h = 0; h < rows; h++) + { + for (unsigned int w = 0; w < cols; w++) + { + float accumulated_scale = 0.0; + for (int z = -radius; z <= radius; z++) + { + int k = boost::numeric_cast(c) + z; + + if ((k < 0) || (k >= boost::numeric_cast(depth))) + { + continue; + } + + float inval = inputData[n * cols * rows * depth + + boost::numeric_cast(k) * cols * rows + + h * cols + + w]; + + accumulated_scale += inval*inval; + } + float scale = kappa + (accumulated_scale * alpha); + scale = powf(scale, -beta); + outputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w] = scale * + inputData[n * cols * rows * depth + + c * cols * rows + + h * cols + + w]; + } + } + } + } +} + +void RefNormalizationFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + + + if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType) + { + if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType) + { + NormalizeWithinUingLbr(inputData, + outputData, + inputInfo.GetShape(), + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K); + } + else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType) + { + NormalizeAcrossUingLbr(inputData, + outputData, + inputInfo.GetShape(), + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K); + } + else + { + BOOST_LOG_TRIVIAL(warning) << "Illegal NORMALIZATION mode in normalization_f32"; + return; + } + } + else + { + BOOST_LOG_TRIVIAL(warning) << "Lcr method (Jarret 2009: Local Contrast Normalization) not supported yet."; + return; + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp b/src/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp new file mode 100644 index 0000000000..e30356c422 --- /dev/null +++ b/src/backends/RefWorkloads/RefNormalizationFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefNormalizationFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefPermuteWorkload.cpp b/src/backends/RefWorkloads/RefPermuteWorkload.cpp new file mode 100644 index 0000000000..4093ff38f4 --- /dev/null +++ b/src/backends/RefWorkloads/RefPermuteWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefPermuteWorkload.hpp" +#include "RefWorkloadUtils.hpp" + +#include +#include "TypeUtils.hpp" + +namespace armnn +{ + +template +void RefPermuteWorkload::Execute() const +{ + using T = ResolveType; + + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute"); + + const ITensorHandle* src = m_Data.m_Inputs[0]; + const ITensorHandle* dst = m_Data.m_Outputs[0]; + const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData(src), GetCpuData(dst)); +} + +template class RefPermuteWorkload; +template class RefPermuteWorkload; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefPermuteWorkload.hpp b/src/backends/RefWorkloads/RefPermuteWorkload.hpp new file mode 100644 index 0000000000..d72cf77e74 --- /dev/null +++ b/src/backends/RefWorkloads/RefPermuteWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +template +class RefPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("RefPermute") + GetDataTypeName(DataType) + "Workload"; + return name; + } + + using TypedWorkload::m_Data; + using TypedWorkload::TypedWorkload; + void Execute() const override; +}; + +using RefPermuteFloat32Workload = RefPermuteWorkload; +using RefPermuteUint8Workload = RefPermuteWorkload; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp b/src/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp new file mode 100644 index 0000000000..2542756c26 --- /dev/null +++ b/src/backends/RefWorkloads/RefPooling2dFloat32Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefPooling2dFloat32Workload.hpp" + +#include "Pooling2d.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefPooling2dFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dFloat32Workload_Execute"); + + const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo0 = GetTensorInfo(m_Data.m_Outputs[0]); + + float* outputData = GetOutputTensorDataFloat(0, m_Data); + const float* inputData = GetInputTensorDataFloat(0, m_Data); + + Pooling2d(inputData, + outputData, + inputInfo0, + outputInfo0, + m_Data.m_Parameters); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp b/src/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp new file mode 100644 index 0000000000..501fb71aff --- /dev/null +++ b/src/backends/RefWorkloads/RefPooling2dFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefPooling2dFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefPooling2dUint8Workload.cpp b/src/backends/RefWorkloads/RefPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..91fdf291ee --- /dev/null +++ b/src/backends/RefWorkloads/RefPooling2dUint8Workload.cpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefPooling2dUint8Workload.hpp" + +#include "Pooling2d.hpp" +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefPooling2dUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + std::vector results(outputInfo.GetNumElements()); + Pooling2d(dequant.data(), + results.data(), + inputInfo, + outputInfo, + m_Data.m_Parameters); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefPooling2dUint8Workload.hpp b/src/backends/RefWorkloads/RefPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..6544f9a785 --- /dev/null +++ b/src/backends/RefWorkloads/RefPooling2dUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefPooling2dUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefReshapeFloat32Workload.cpp b/src/backends/RefWorkloads/RefReshapeFloat32Workload.cpp new file mode 100644 index 0000000000..99c94a49a1 --- /dev/null +++ b/src/backends/RefWorkloads/RefReshapeFloat32Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefReshapeFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefReshapeFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeFloat32Workload_Execute"); + + void* output = GetOutputTensorData(0, m_Data); + const void* input = GetInputTensorData(0, m_Data); + unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + memcpy(output, input, numBytes); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefReshapeFloat32Workload.hpp b/src/backends/RefWorkloads/RefReshapeFloat32Workload.hpp new file mode 100644 index 0000000000..9281e89cf7 --- /dev/null +++ b/src/backends/RefWorkloads/RefReshapeFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefReshapeFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefReshapeUint8Workload.cpp b/src/backends/RefWorkloads/RefReshapeUint8Workload.cpp new file mode 100644 index 0000000000..8f475f3db3 --- /dev/null +++ b/src/backends/RefWorkloads/RefReshapeUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefReshapeUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefReshapeUint8Workload_Execute"); + + void* output = GetOutputTensorData(0, m_Data); + const void* input = GetInputTensorData(0, m_Data); + unsigned int numBytes = GetTensorInfo(m_Data.m_Inputs[0]).GetNumBytes(); + memcpy(output, input, numBytes); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefReshapeUint8Workload.hpp b/src/backends/RefWorkloads/RefReshapeUint8Workload.hpp new file mode 100644 index 0000000000..b37fb4bdeb --- /dev/null +++ b/src/backends/RefWorkloads/RefReshapeUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefReshapeUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp b/src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp new file mode 100644 index 0000000000..50ee7a218a --- /dev/null +++ b/src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.cpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefResizeBilinearFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "ResizeBilinear.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefResizeBilinearFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearFloat32Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + ResizeBilinear(GetInputTensorDataFloat(0, m_Data), + inputInfo, + GetOutputTensorDataFloat(0, m_Data), + outputInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp b/src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp new file mode 100644 index 0000000000..0fff7ee695 --- /dev/null +++ b/src/backends/RefWorkloads/RefResizeBilinearFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefResizeBilinearFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp b/src/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp new file mode 100644 index 0000000000..67ab039ef3 --- /dev/null +++ b/src/backends/RefWorkloads/RefResizeBilinearUint8Workload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefResizeBilinearUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "ResizeBilinear.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefResizeBilinearUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefResizeBilinearUint8Workload_Execute"); + + const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]); + const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo); + + std::vector results(outputInfo.GetNumElements()); + ResizeBilinear(dequant.data(), inputInfo, results.data(), outputInfo); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp b/src/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp new file mode 100644 index 0000000000..bbaf899ca6 --- /dev/null +++ b/src/backends/RefWorkloads/RefResizeBilinearUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefResizeBilinearUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp b/src/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp new file mode 100644 index 0000000000..1f519bda10 --- /dev/null +++ b/src/backends/RefWorkloads/RefSoftmaxFloat32Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSoftmaxFloat32Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "Softmax.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSoftmaxFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxFloat32Workload_Execute"); + + Softmax(GetInputTensorDataFloat(0, m_Data), + GetOutputTensorDataFloat(0, m_Data), + GetTensorInfo(m_Data.m_Inputs[0]), + m_Data.m_Parameters.m_Beta); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp b/src/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp new file mode 100644 index 0000000000..d37f2b5990 --- /dev/null +++ b/src/backends/RefWorkloads/RefSoftmaxFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSoftmaxFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp b/src/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..17114ec83a --- /dev/null +++ b/src/backends/RefWorkloads/RefSoftmaxUint8Workload.cpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSoftmaxUint8Workload.hpp" + +#include "RefWorkloadUtils.hpp" +#include "Softmax.hpp" + +#include "Profiling.hpp" + +#include + +namespace armnn +{ + +void RefSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSoftmaxUint8Workload_Execute"); + + const TensorInfo& tensorInfo = GetTensorInfo(m_Data.m_Inputs[0]); + + auto dequant = Dequantize(GetInputTensorDataU8(0, m_Data), tensorInfo); + + std::vector results(tensorInfo.GetNumElements()); + + Softmax(dequant.data(), + results.data(), + tensorInfo, + m_Data.m_Parameters.m_Beta); + + Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), GetTensorInfo(m_Data.m_Outputs[0])); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp b/src/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..b179d529da --- /dev/null +++ b/src/backends/RefWorkloads/RefSoftmaxUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSoftmaxUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSplitterFloat32Workload.cpp b/src/backends/RefWorkloads/RefSplitterFloat32Workload.cpp new file mode 100644 index 0000000000..75611dacf3 --- /dev/null +++ b/src/backends/RefWorkloads/RefSplitterFloat32Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSplitterFloat32Workload.hpp" + +#include "Splitter.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSplitterFloat32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterFloat32Workload_Execute"); + Splitter(m_Data); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSplitterFloat32Workload.hpp b/src/backends/RefWorkloads/RefSplitterFloat32Workload.hpp new file mode 100644 index 0000000000..12176dd277 --- /dev/null +++ b/src/backends/RefWorkloads/RefSplitterFloat32Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSplitterFloat32Workload : public Float32Workload +{ +public: + using Float32Workload::Float32Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSplitterUint8Workload.cpp b/src/backends/RefWorkloads/RefSplitterUint8Workload.cpp new file mode 100644 index 0000000000..ca9f5db850 --- /dev/null +++ b/src/backends/RefWorkloads/RefSplitterUint8Workload.cpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefSplitterUint8Workload.hpp" + +#include "Splitter.hpp" + +#include "Profiling.hpp" + +namespace armnn +{ + +void RefSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSplitterUint8Workload_Execute"); + Splitter(m_Data); +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefSplitterUint8Workload.hpp b/src/backends/RefWorkloads/RefSplitterUint8Workload.hpp new file mode 100644 index 0000000000..e80cb1a654 --- /dev/null +++ b/src/backends/RefWorkloads/RefSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +namespace armnn +{ + +class RefSplitterUint8Workload : public Uint8Workload +{ +public: + using Uint8Workload::Uint8Workload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/RefWorkloadUtils.hpp b/src/backends/RefWorkloads/RefWorkloadUtils.hpp new file mode 100644 index 0000000000..616a875028 --- /dev/null +++ b/src/backends/RefWorkloads/RefWorkloadUtils.hpp @@ -0,0 +1,138 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/CpuTensorHandle.hpp" + +#include +#include +#include + +#include + +namespace armnn +{ + +//////////////////////////////////////////// +/// float32 helpers +//////////////////////////////////////////// + +inline const TensorInfo& GetTensorInfo(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetTensorInfo(); +} + +template +inline const DataType* GetConstCpuData(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetConstTensor(); +} + +template +inline DataType* GetCpuData(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetTensor(); +}; + +template +const DataType* GetInputTensorData(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Inputs[idx]; + return GetConstCpuData(tensorHandle); +} + +template +DataType* GetOutputTensorData(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return GetCpuData(tensorHandle); +} + +template +const float* GetInputTensorDataFloat(unsigned int idx, const PayloadType& data) +{ + return GetInputTensorData(idx, data); +} + +template +float* GetOutputTensorDataFloat(unsigned int idx, const PayloadType& data) +{ + return GetOutputTensorData(idx, data); +} + +template +const Half* GetInputTensorDataHalf(unsigned int idx, const PayloadType& data) +{ + return GetInputTensorData(idx, data); +} + +template +Half* GetOutputTensorDataHalf(unsigned int idx, const PayloadType& data) +{ + return GetOutputTensorData(idx, data); +} + +//////////////////////////////////////////// +/// u8 helpers +//////////////////////////////////////////// + +inline const uint8_t* GetConstCpuU8Data(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use (Const)CpuTensorHandles only, so this cast is legitimate. + const ConstCpuTensorHandle* cpuTensorHandle = + boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetConstTensor(); +}; + +inline uint8_t* GetCpuU8Data(const ITensorHandle* tensorHandle) +{ + // We know that reference workloads use CpuTensorHandles only, so this cast is legitimate. + const CpuTensorHandle* cpuTensorHandle = boost::polymorphic_downcast(tensorHandle); + return cpuTensorHandle->GetTensor(); +}; + +template +const uint8_t* GetInputTensorDataU8(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Inputs[idx]; + return GetConstCpuU8Data(tensorHandle); +} + +template +uint8_t* GetOutputTensorDataU8(unsigned int idx, const PayloadType& data) +{ + const ITensorHandle* tensorHandle = data.m_Outputs[idx]; + return GetCpuU8Data(tensorHandle); +} + +template +std::vector Dequantize(const T* quant, const TensorInfo& info) +{ + std::vector ret(info.GetNumElements()); + for (size_t i = 0; i < info.GetNumElements(); i++) + { + ret[i] = armnn::Dequantize(quant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } + return ret; +} + +inline void Quantize(uint8_t* quant, const float* dequant, const TensorInfo& info) +{ + for (size_t i = 0; i < info.GetNumElements(); i++) + { + quant[i] = armnn::Quantize(dequant[i], info.GetQuantizationScale(), info.GetQuantizationOffset()); + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/ResizeBilinear.cpp b/src/backends/RefWorkloads/ResizeBilinear.cpp new file mode 100644 index 0000000000..0bce3c7ed8 --- /dev/null +++ b/src/backends/RefWorkloads/ResizeBilinear.cpp @@ -0,0 +1,92 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ResizeBilinear.hpp" + +#include "TensorBufferArrayView.hpp" + +#include + +#include +#include + +namespace armnn +{ + +namespace +{ + +inline float Lerp(float a, float b, float w) +{ + return w * b + (1.f - w) * a; +} + +} + +void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo) +{ + // We follow the definition of TensorFlow and AndroidNN: the top-left corner of a texel in the output + // image is projected into the input image to figure out the interpolants and weights. Note that this + // will yield different results than if projecting the centre of output texels. + + const unsigned int batchSize = inputInfo.GetShape()[0]; + const unsigned int channelCount = inputInfo.GetShape()[1]; + + const unsigned int inputHeight = inputInfo.GetShape()[2]; + const unsigned int inputWidth = inputInfo.GetShape()[3]; + const unsigned int outputHeight = outputInfo.GetShape()[2]; + const unsigned int outputWidth = outputInfo.GetShape()[3]; + + // How much to scale pixel coordinates in the output image, to get the corresponding pixel coordinates + // in the input image. + const float scaleY = boost::numeric_cast(inputHeight) / boost::numeric_cast(outputHeight); + const float scaleX = boost::numeric_cast(inputWidth) / boost::numeric_cast(outputWidth); + + TensorBufferArrayView input(inputInfo.GetShape(), in); + TensorBufferArrayView output(outputInfo.GetShape(), out); + + for (unsigned int n = 0; n < batchSize; ++n) + { + for (unsigned int c = 0; c < channelCount; ++c) + { + for (unsigned int y = 0; y < outputHeight; ++y) + { + // Corresponding real-valued height coordinate in input image. + const float iy = boost::numeric_cast(y) * scaleY; + + // Discrete height coordinate of top-left texel (in the 2x2 texel area used for interpolation). + const float fiy = floorf(iy); + const unsigned int y0 = boost::numeric_cast(fiy); + + // Interpolation weight (range [0,1]). + const float yw = iy - fiy; + + for (unsigned int x = 0; x < outputWidth; ++x) + { + // Real-valued and discrete width coordinates in input image. + const float ix = boost::numeric_cast(x) * scaleX; + const float fix = floorf(ix); + const unsigned int x0 = boost::numeric_cast(fix); + + // Interpolation weight (range [0,1]). + const float xw = ix - fix; + + // Discrete width/height coordinates of texels below and to the right of (x0, y0). + const unsigned int x1 = std::min(x0 + 1, inputWidth - 1u); + const unsigned int y1 = std::min(y0 + 1, inputHeight - 1u); + + // Interpolation + const float ly0 = Lerp(input.Get(n, c, y0, x0), input.Get(n, c, y0, x1), xw); // lerp along row y0. + const float ly1 = Lerp(input.Get(n, c, y1, x0), input.Get(n, c, y1, x1), xw); // lerp along row y1. + const float l = Lerp(ly0, ly1, yw); + + output.Get(n, c, y, x) = l; + } + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/ResizeBilinear.hpp b/src/backends/RefWorkloads/ResizeBilinear.hpp new file mode 100644 index 0000000000..847b8e8bef --- /dev/null +++ b/src/backends/RefWorkloads/ResizeBilinear.hpp @@ -0,0 +1,15 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +void ResizeBilinear(const float* in, const TensorInfo& inputInfo, float* out, const TensorInfo& outputInfo); + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Softmax.cpp b/src/backends/RefWorkloads/Softmax.cpp new file mode 100644 index 0000000000..4f1016e86c --- /dev/null +++ b/src/backends/RefWorkloads/Softmax.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Softmax.hpp" + +#include +#include + +namespace armnn +{ + +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. +void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta) +{ + unsigned int numChannels = tensorInfo.GetShape()[1]; + for (unsigned int n = 0; n < tensorInfo.GetShape()[0]; n++) + { + // Find maximum channel. + float max = in[n * numChannels]; + for (unsigned int c = 1; c < numChannels; c++) + { + float val = in[n * numChannels + c]; + if (val > max) + { + max = val; + } + } + + // Exponentiate all values and sum. + std::vector exponentials(numChannels); + float sum = 0.0f; + for (unsigned int c = 0; c < numChannels; c++) + { + float val = in[n * numChannels + c]; + exponentials[c] = expf((val - max) * beta); + sum += exponentials[c]; + } + + // Divide exponentials by sum to give outputs. + for (unsigned int c = 0; c < numChannels; c++) + { + out[n * numChannels + c] = exponentials[c] / sum; + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Softmax.hpp b/src/backends/RefWorkloads/Softmax.hpp new file mode 100644 index 0000000000..3b974f9e9e --- /dev/null +++ b/src/backends/RefWorkloads/Softmax.hpp @@ -0,0 +1,16 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +/// Computes the softmax function on some inputs, into outputs, with a shape given by tensorInfo. +void Softmax(const float* in, float* out, const TensorInfo& tensorInfo, float beta); + +} //namespace armnn diff --git a/src/backends/RefWorkloads/Splitter.hpp b/src/backends/RefWorkloads/Splitter.hpp new file mode 100644 index 0000000000..e9c0379c9e --- /dev/null +++ b/src/backends/RefWorkloads/Splitter.hpp @@ -0,0 +1,84 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "RefWorkloadUtils.hpp" + +#include "backends/WorkloadData.hpp" + +#include + +#include + +namespace armnn +{ + +template +void Splitter(const SplitterQueueDescriptor& data) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]); + + for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index) + { + unsigned int indices[MaxNumOfTensorDimensions] = { 0 }; + + unsigned int indexRemainder = index; + unsigned int dimensionStride = inputInfo0.GetNumElements(); + + for (unsigned int i = 0; i= view.m_Origin[i] + outputInfo.GetShape()[i]) + { + insideView = false; + } + } + + if (insideView) + { + unsigned int outIndex = 0; + unsigned int dimensionStride = 1; + + for (unsigned int i = outputInfo.GetNumDimensions(); i-- > 0;) + { + outIndex += dimensionStride * (indices[i] - view.m_Origin[i]); + dimensionStride *= outputInfo.GetShape()[i]; + } + + //We are within the view, to copy input data to the output corresponding to this view. + DataType* outputData = GetOutputTensorData(viewIdx, data); + BOOST_ASSERT(outputData); + + const DataType* inputData = GetInputTensorData(0, data); + BOOST_ASSERT(inputData); + + outputData[outIndex] = inputData[index]; + } + } + } +} + +} //namespace armnn diff --git a/src/backends/RefWorkloads/TensorBufferArrayView.hpp b/src/backends/RefWorkloads/TensorBufferArrayView.hpp new file mode 100644 index 0000000000..e19810ca87 --- /dev/null +++ b/src/backends/RefWorkloads/TensorBufferArrayView.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include + +namespace armnn +{ + +// Utility class providing access to raw tensor memory based on indices along each dimension. +template +class TensorBufferArrayView +{ +public: + TensorBufferArrayView(const TensorShape& shape, DataType* data) + : m_Shape(shape) + , m_Data(data) + { + } + + DataType& Get(unsigned int b, unsigned int c, unsigned int h, unsigned int w) const + { + BOOST_ASSERT( b < m_Shape[0] || (m_Shape[0] == 0 && b == 0) ); + BOOST_ASSERT( c < m_Shape[1] || (m_Shape[1] == 0 && c == 0) ); + BOOST_ASSERT( h < m_Shape[2] || (m_Shape[2] == 0 && h == 0) ); + BOOST_ASSERT( w < m_Shape[3] || (m_Shape[3] == 0 && w == 0) ); + + return m_Data[b * m_Shape[1] * m_Shape[2] * m_Shape[3] + + c * m_Shape[2] * m_Shape[3] + + h * m_Shape[3] + + w]; + } + +private: + const TensorShape m_Shape; + DataType* m_Data; +}; + +} //namespace armnn diff --git a/src/backends/RefWorkloads/backend.cmake b/src/backends/RefWorkloads/backend.cmake new file mode 100644 index 0000000000..6fd09eba6c --- /dev/null +++ b/src/backends/RefWorkloads/backend.cmake @@ -0,0 +1,7 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/RefWorkloads) +list(APPEND armnnLibraries armnnRefBackend) diff --git a/src/backends/StringMapping.cpp b/src/backends/StringMapping.cpp new file mode 100644 index 0000000000..3ca8843812 --- /dev/null +++ b/src/backends/StringMapping.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "StringMapping.hpp" + +namespace armnn +{ + +const StringMapping& StringMapping::Instance() +{ + static StringMapping instance; + return instance; +} + +} // armnn diff --git a/src/backends/StringMapping.hpp b/src/backends/StringMapping.hpp new file mode 100644 index 0000000000..6312e68945 --- /dev/null +++ b/src/backends/StringMapping.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +namespace armnn +{ + +/// +/// StringMapping is helper class to be able to use strings as template +/// parameters, so this allows simplifying code which only differs in +/// a string, such as a debug string literal. +/// +struct StringMapping +{ +public: + enum Id { + RefAdditionWorkload_Execute, + RefSubtractionWorkload_Execute, + RefMultiplicationWorkload_Execute, + RefDivisionWorkload_Execute, + MAX_STRING_ID + }; + + const char * Get(Id id) const + { + return m_Strings[id]; + } + + static const StringMapping& Instance(); + +private: + StringMapping() + { + m_Strings[RefAdditionWorkload_Execute] = "RefAdditionWorkload_Execute"; + m_Strings[RefSubtractionWorkload_Execute] = "RefSubtractionWorkload_Execute"; + m_Strings[RefMultiplicationWorkload_Execute] = "RefMultiplicationWorkload_Execute"; + m_Strings[RefDivisionWorkload_Execute] = "RefDivisionWorkload_Execute"; + } + + StringMapping(const StringMapping &) = delete; + StringMapping& operator=(const StringMapping &) = delete; + + const char * m_Strings[MAX_STRING_ID]; +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/Workload.hpp b/src/backends/Workload.hpp new file mode 100644 index 0000000000..cf9c6f21e5 --- /dev/null +++ b/src/backends/Workload.hpp @@ -0,0 +1,147 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "WorkloadData.hpp" +#include "WorkloadInfo.hpp" +#include +#include "Profiling.hpp" + +namespace armnn +{ + +// Workload interface to enqueue a layer computation. +class IWorkload +{ +public: + virtual ~IWorkload() {} + + virtual void Execute() const = 0; +}; + +// NullWorkload used to denote an unsupported workload when used by the MakeWorkload<> template +// in the various workload factories. +// There should never be an instantiation of a NullWorkload. +class NullWorkload : public IWorkload +{ + NullWorkload()=delete; +}; + +template +class BaseWorkload : public IWorkload +{ +public: + + BaseWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) + : m_Data(descriptor) + { + m_Data.Validate(info); + } + + const QueueDescriptor& GetData() const { return m_Data; } + +protected: + const QueueDescriptor m_Data; +}; + +// TypedWorkload used +template +class TypedWorkload : public BaseWorkload +{ +public: + + TypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info) + { + std::vector dataTypes = {DataTypes...}; + armnn::DataType expectedInputType; + + if (!info.m_InputTensorInfos.empty()) + { + expectedInputType = info.m_InputTensorInfos.front().GetDataType(); + + if (std::find(dataTypes.begin(), dataTypes.end(), expectedInputType) == dataTypes.end()) + { + BOOST_ASSERT_MSG(false, "Trying to create workload with incorrect type"); + } + BOOST_ASSERT_MSG(std::all_of(std::next(info.m_InputTensorInfos.begin()), + info.m_InputTensorInfos.end(), + [&](auto it){ + return it.GetDataType() == expectedInputType; + }), + "Trying to create workload with incorrect type"); + } + armnn::DataType expectedOutputType; + + if (!info.m_OutputTensorInfos.empty()) + { + expectedOutputType = info.m_OutputTensorInfos.front().GetDataType(); + + if (!info.m_InputTensorInfos.empty()) + { + if (expectedOutputType != expectedInputType) + { + BOOST_ASSERT_MSG(false, "Trying to create workload with incorrect type"); + } + } + else if (std::find(dataTypes.begin(), dataTypes.end(), expectedOutputType) == dataTypes.end()) + { + BOOST_ASSERT_MSG(false, "Trying to create workload with incorrect type"); + } + BOOST_ASSERT_MSG(std::all_of(std::next(info.m_OutputTensorInfos.begin()), + info.m_OutputTensorInfos.end(), + [&](auto it){ + return it.GetDataType() == expectedOutputType; + }), + "Trying to create workload with incorrect type"); + } + } +}; + +template +class MultiTypedWorkload : public BaseWorkload +{ +public: + + MultiTypedWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info) + { + BOOST_ASSERT_MSG(std::all_of(info.m_InputTensorInfos.begin(), + info.m_InputTensorInfos.end(), + [&](auto it){ + return it.GetDataType() == InputDataType; + }), + "Trying to create workload with incorrect type"); + BOOST_ASSERT_MSG(std::all_of(info.m_OutputTensorInfos.begin(), + info.m_OutputTensorInfos.end(), + [&](auto it){ + return it.GetDataType() == OutputDataType; + }), + "Trying to create workload with incorrect type"); + } +}; + +template +using FloatWorkload = TypedWorkload; + +template +using Float32Workload = TypedWorkload; + +template +using Uint8Workload = TypedWorkload; + +template +using Float16ToFloat32Workload = MultiTypedWorkload; + +template +using Float32ToFloat16Workload = MultiTypedWorkload; + +} //namespace armnn diff --git a/src/backends/WorkloadData.cpp b/src/backends/WorkloadData.cpp new file mode 100644 index 0000000000..c5c607d954 --- /dev/null +++ b/src/backends/WorkloadData.cpp @@ -0,0 +1,871 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "WorkloadData.hpp" + +#include "CpuTensorHandle.hpp" +#include "WorkloadInfo.hpp" + +#include +#include +#include +#include + +#include + +namespace armnn +{ + +//--------------------------------------------------------------- +DataType GetBiasDataType(DataType inputDataType) +{ + switch (inputDataType) + { + case DataType::Float16: + return DataType::Float16; + case DataType::Float32: + return DataType::Float32; + case DataType::QuantisedAsymm8: + return DataType::Signed32; + default: + BOOST_ASSERT_MSG(false, "Invalid input data type"); + return DataType::Float32; + } +} + +namespace +{ + +//--------------------------------------------------------------- +//android ndk does not support std::to_string function. +template +std::string to_string(T value) +{ + std::ostringstream os; + os << value; + return os.str(); +} + +//--------------------------------------------------------------- +void ValidatePointer(const void* ptr, std::string const& descName, std::string const& paramName) +{ + if (!ptr) + { + throw InvalidArgumentException(descName + ": Invalid null pointer. The " + + paramName + " parameter must be set."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorShapesMatch(const TensorInfo& first, + const TensorInfo& second, + std::string const& descName, + std::string const& firstName, + std::string const& secondName) +{ + if (first.GetShape() != second.GetShape()) + { + throw InvalidArgumentException(descName + ": " + + firstName + " & " + secondName + " must have identical shapes"); + } +} + +//--------------------------------------------------------------- +void ValidateNoInputs(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_InputTensorInfos.size() != 0) + { + throw InvalidArgumentException(descName + + ": Requires no inputs. " + + to_string(workloadInfo.m_InputTensorInfos.size()) + " has been provided."); + } +} + +//--------------------------------------------------------------- +void ValidateSingleInput(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_InputTensorInfos.size() != 1) + { + throw InvalidArgumentException(descName + + ": Requires exactly one input. " + + to_string(workloadInfo.m_InputTensorInfos.size()) + " has been provided." ); + } +} + +//--------------------------------------------------------------- +void ValidateTwoInputs(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_InputTensorInfos.size() != 2) + { + throw InvalidArgumentException(descName + + ": Requires exactly two workloadInfo.m_InputTensorInfos. " + + to_string(workloadInfo.m_InputTensorInfos.size()) + " have been provided."); + } +} + +//--------------------------------------------------------------- +void ValidateSingleOutput(const WorkloadInfo& workloadInfo, std::string const& descName) +{ + if (workloadInfo.m_OutputTensorInfos.size() != 1) + { + throw InvalidArgumentException(descName + + ": Requires exactly one output. " + + to_string(workloadInfo.m_OutputTensorInfos.size()) + " has been provided."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorNumDimensions(const TensorInfo& tensor, + std::string const& descName, + unsigned int numDimensions, + std::string const& tensorName) +{ + if (tensor.GetNumDimensions() != numDimensions) + { + throw InvalidArgumentException(descName + ": Expected " + to_string(numDimensions) + " but got " + + to_string(tensor.GetNumDimensions()) + " dimensions for " + + tensorName + " tensor."); + } +} + +//--------------------------------------------------------------- +void ValidateTensorDataType(const TensorInfo& tensor, DataType dataType, + const std::string& descName, std::string const& tensorName) +{ + if (tensor.GetDataType() != dataType) + { + throw InvalidArgumentException(descName + ": Expected data type " + GetDataTypeName(dataType) + " but got " + + GetDataTypeName(tensor.GetDataType()) + " for " + tensorName + " tensor."); + } +} + +//--------------------------------------------------------------- +void ValidateBiasTensorQuantization(const TensorInfo& biasTensor, const TensorInfo& inputTensorInfo, + const TensorInfo& weightsTensorInfo, const std::string& descName) +{ + if (biasTensor.GetQuantizationOffset() != 0) + { + throw InvalidArgumentException(descName + ": Expected zero quantization offset for bias tensor but got " + + to_string(biasTensor.GetQuantizationOffset())); + } + const float expectedScale = inputTensorInfo.GetQuantizationScale() * weightsTensorInfo.GetQuantizationScale(); + if (std::abs(biasTensor.GetQuantizationScale() - expectedScale) > 0.000000001f) + { + // Print the float values with extra precision to see very small differences + std::stringstream msg; + msg << std::setprecision(10) << descName << ": Expected " << expectedScale << + " quantization scale for bias tensor (the product of the input and weight scales), but got " << + biasTensor.GetQuantizationScale(); + throw InvalidArgumentException(msg.str()); + } +} + +//--------------------------------------------------------------- +void ValidateTensors(const std::vector& vec, + unsigned int numExpected, + const std::string& descName, + const std::string& varName) +{ + if (vec.empty() && numExpected > 0) + { + throw InvalidArgumentException(descName + ": Invalid empty " + varName + " array."); + } + + for (unsigned int i = 0; i < numExpected; ++i) + { + if (!vec[i]) + { + throw InvalidArgumentException(descName + ": Invalid NULL for " + varName + to_string(i)); + } + } +} + +//--------------------------------------------------------------- +void ValidateBroadcastTensorShapesMatch(const TensorInfo& first, + const TensorInfo& second, + const TensorInfo& output, + std::string const& descName, + std::string const& firstName, + std::string const& secondName) +{ + // Tensors must have the same number of dimensions in order to be explicit about which dimensions will get + // broadcasted. + if (first.GetNumDimensions() != second.GetNumDimensions()) + { + throw InvalidArgumentException(descName + ": Tensors " + + firstName + " & " + secondName + + " must have the same number of dimensions in order to be broadcasted"); + } + uint32_t numDims = first.GetNumDimensions(); + std::vector outputDims(numDims, 0u); + for (uint32_t i = 0; i < numDims; i++) + { + const bool dimsNotEqual = first.GetShape()[i] != second.GetShape()[i]; + const bool dimsNotOne = (first.GetShape()[i] != 1) && (second.GetShape()[i] != 1); + if (dimsNotEqual && dimsNotOne) + { + throw InvalidArgumentException("Broadcasting is not possible for incompatible shapes"); + } + outputDims[i] = std::max(first.GetShape()[i], second.GetShape()[i]); + } + TensorShape broadcastShape = TensorShape(boost::numeric_cast(outputDims.size()), outputDims.data()); + if (broadcastShape != output.GetShape()) + { + throw InvalidArgumentException(descName + ": The tensor shape resulting from adding " + + firstName + " & " + secondName + + " does not match the output shape"); + } +} + +//--------------------------------------------------------------- +/// Validates that the output tensor's quantization scale is greater than the product +/// of the two input tensors' quantization scales. This is a requirement of the implementation of +/// the quantized multiplication. +void ValidateTensorQuantizationMultiplier(const TensorInfo& inputTensor1, const TensorInfo& inputTensor2, + const TensorInfo& outputTensorInfo, std::string const& descName, + const std::string& inputTensor1Name, const std::string& inputTensor2Name, const std::string& outputTensorName) +{ + if (outputTensorInfo.GetDataType() == DataType::QuantisedAsymm8) + { + if (outputTensorInfo.GetQuantizationScale() <= + inputTensor1.GetQuantizationScale() * inputTensor2.GetQuantizationScale()) + { + std::stringstream msg; + msg << descName << ": Quantization scale of " << outputTensorName << " is not greater than " << + "the product of the " << inputTensor1Name << " and " << inputTensor2Name << " tensors"; + throw InvalidArgumentException(msg.str()); + } + } +} + +} //namespace + +void QueueDescriptor::ValidateInputsOutputs(const std::string& descName, + unsigned int numExpectedIn, unsigned int numExpectedOut) const +{ + ValidateTensors(m_Inputs, numExpectedIn, descName, "input"); + ValidateTensors(m_Outputs, numExpectedOut, descName, "output"); +} + +//--------------------------------------------------------------- +void MemCopyQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "MemCopyQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "MemCopyQueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos.size() != workloadInfo.m_OutputTensorInfos.size()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of input infos (%1%) does not match the number of output infos (%2%)") + % workloadInfo.m_InputTensorInfos.size() % workloadInfo.m_OutputTensorInfos.size())); + } + + for (std::size_t i = 0; i < workloadInfo.m_InputTensorInfos.size(); ++i) + { + if (workloadInfo.m_InputTensorInfos[i].GetNumElements() != + workloadInfo.m_OutputTensorInfos[i].GetNumElements()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of elements for tensor input and output %1% does not match") + % i )); + } + } + + if (m_Inputs.size() != m_Outputs.size()) + { + throw InvalidArgumentException(boost::str( + boost::format("Number of inputs (%1%) does not match the number of outputs (%2%)") + % m_Inputs.size() % m_Outputs.size())); + } + + for (unsigned int i = 0; i < m_Inputs.size(); ++i) + { + if (!m_Inputs[i]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null input %1%") % i)); + } + + if (!m_Outputs[i]) + { + throw InvalidArgumentException(boost::str(boost::format("Invalid null output %1%") % i)); + } + } +} + +//--------------------------------------------------------------- +void ActivationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ActivationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ActivationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "ActivationQueueDescriptor", + "input", + "output"); +} + +//--------------------------------------------------------------- +void SoftmaxQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "SoftmaxQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "SoftmaxQueueDescriptor"); + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "SoftmaxQueueDescriptor", 2, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "SoftmaxQueueDescriptor", 2, "output"); + + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "SoftmaxQueueDescriptor", + "input", + "output"); +} + +//--------------------------------------------------------------- +void SplitterQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "SplitterQueueDescriptor"); + + if (workloadInfo.m_OutputTensorInfos.size() <= 0) + { + throw InvalidArgumentException("SplitterQueueDescriptor: At least one output needs to be provided."); + } + + if (workloadInfo.m_OutputTensorInfos.size() != m_ViewOrigins.size()) + { + throw InvalidArgumentException( + "SplitterQueueDescriptor: Number of split windows " + "has to match number of workloadInfo.m_OutputTensorInfos. " + "Number of windows: " + + to_string(m_ViewOrigins.size()) + + ". Number of workloadInfo.m_OutputTensorInfos: " + to_string(workloadInfo.m_OutputTensorInfos.size())); + } + + //The dimensionality of all the windows has to match the dimensionality (not shape) of the input. + std::size_t inputDims = workloadInfo.m_InputTensorInfos[0].GetNumDimensions(); + for(unsigned int w = 0; w < m_ViewOrigins.size(); ++w ) + { + //Checks that the dimensionality of input is same as the split windows. + ViewOrigin const& e = m_ViewOrigins[w]; + if (e.m_Origin.size() != inputDims) + { + throw InvalidArgumentException("SplitterQueueDescriptor: Window origin have to " + "have the same dimensionality as the input tensor. " + "Window origin (index: " + + to_string(w) + ") has " + to_string(e.m_Origin.size()) + + " dimensions, the input " + "tensor has " + + to_string(inputDims) + " dimensions."); + } + for (unsigned int i = 0; i < e.m_Origin.size(); ++i) + { + if (e.m_Origin[i] + workloadInfo.m_OutputTensorInfos[w].GetShape()[i] > + workloadInfo.m_InputTensorInfos[0].GetShape()[i]) + { + throw InvalidArgumentException("SplitterQueueDescriptor: Window extent coordinates have to " + "be smaller or equal than the size of the input in that coord."); + } + } + } +} + +//--------------------------------------------------------------- +void MergerQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleOutput(workloadInfo, "MergerQueueDescriptor"); + + if (m_Inputs.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one input needs to be provided."); + } + if (m_Outputs.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one output needs to be provided."); + } + + if (workloadInfo.m_InputTensorInfos.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo input needs to be provided."); + } + if (workloadInfo.m_OutputTensorInfos.size() <= 0) + { + throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo output needs to be provided."); + } + + if (workloadInfo.m_InputTensorInfos.size() != m_ViewOrigins.size()) + { + throw InvalidArgumentException( + "MergerQueueDescriptor: Number of split windows " + "has to match number of workloadInfo.m_InputTensorInfos. " + "Number of windows: " + + to_string(m_ViewOrigins.size()) + + ". Number of workloadInfo.m_InputTensorInfos: " + to_string(workloadInfo.m_InputTensorInfos.size())); + } + + //The dimensionality of all the windows has to match the dimensionality (not shape) of the output. + std::size_t outputDims = workloadInfo.m_OutputTensorInfos[0].GetNumDimensions(); + for(unsigned int w = 0; w < m_ViewOrigins.size(); ++w ) + { + //Checks that the dimensionality of output is same as the split windows. + ViewOrigin const& e = m_ViewOrigins[w]; + if (e.m_Origin.size() != outputDims) + { + throw InvalidArgumentException("MergerQueueDescriptor: Window origin have to " + "have the same dimensionality as the output tensor. " + "Window origin (index: " + + to_string(w) + ") has " + to_string(e.m_Origin.size()) + + " dimensions, the output " + "tensor has " + + to_string(outputDims) + " dimensions."); + } + //Checks that the merge windows are within the output tensor. + for (unsigned int i = 0; i < e.m_Origin.size(); ++i) + { + if (e.m_Origin[i] + workloadInfo.m_InputTensorInfos[w].GetShape()[i] + > workloadInfo.m_OutputTensorInfos[0].GetShape()[i]) + { + throw InvalidArgumentException("MergerQueueDescriptor: Window extent coordinates have to " + "be smaller or equal than the size of the output in that coord."); + } + } + } +} + +//--------------------------------------------------------------- +void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "FullyConnectedQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "FullyConnectedQueueDescriptor"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "FullyConnectedQueueDescriptor", 2, "output"); + + if (!(workloadInfo.m_InputTensorInfos[0].GetNumDimensions() == 2 || + workloadInfo.m_InputTensorInfos[0].GetNumDimensions() == 4)) + { + throw InvalidArgumentException("FullyConnectedQueueDescriptor: Input tensor must have 2 or 4 dimensions."); + } + + if (m_Weight == nullptr) + { + throw InvalidArgumentException("FullyConnectedQueueDescriptor: Weight tensor descriptor is missing."); + } + + ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "FullyConnectedQueueDescriptor", 2, "weight"); + + if (m_Parameters.m_BiasEnabled) + { + if (m_Bias == nullptr) + { + throw InvalidArgumentException("FullyConnectedQueueDescriptor: Bias is enabled but " + "bias value tensor descriptor is missing."); + } + + // Validates type and quantization values. + ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), + workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "FullyConnectedQueueDescriptor"); + + ValidateTensorDataType(m_Bias->GetTensorInfo(), + GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), + "FullyConnectedQueueDescriptor", "bias"); + + ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "FullyConnectedQueueDescriptor", 1, "bias"); + } + + ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], "FullyConnectedQueueDescriptor", "input", "weights", "output"); +} + +//--------------------------------------------------------------- +void NormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "NormalizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "NormalizationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "NormalizationQueueDescriptor", + "input", + "output"); +} + +void AdditionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "AdditionQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "AdditionQueueDescriptor"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "AdditionQueueDescriptor", + "first input", + "second input"); + +} + +//--------------------------------------------------------------- +void MultiplicationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "MultiplicationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "MultiplicationQueueDescriptor"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "MultiplicationQueueDescriptor", + "first input", + "second input"); +} + +void BatchNormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "BatchNormalizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "BatchNormalizationQueueDescriptor"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "BatchNormalizationQueueDescriptor", + "input", + "output"); + ValidatePointer(m_Mean, "BatchNormalizationQueueDescriptor", "mean"); + ValidatePointer(m_Variance, "BatchNormalizationQueueDescriptor", "variance"); + ValidatePointer(m_Beta, "BatchNormalizationQueueDescriptor", "beta"); + ValidatePointer(m_Gamma, "BatchNormalizationQueueDescriptor", "gamma"); + + + ValidateTensorNumDimensions(m_Mean->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "mean"); + ValidateTensorNumDimensions(m_Variance->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "variance"); + ValidateTensorNumDimensions(m_Beta->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "beta"); + ValidateTensorNumDimensions(m_Gamma->GetTensorInfo(), "BatchNormalizationQueueDescriptor", 1, "gamma"); + + ValidateTensorShapesMatch( + m_Mean->GetTensorInfo(), m_Variance->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "variance"); + ValidateTensorShapesMatch( + m_Mean->GetTensorInfo(), m_Beta->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "beta"); + ValidateTensorShapesMatch( + m_Mean->GetTensorInfo(), m_Gamma->GetTensorInfo(), "BatchNormalizationQueueDescriptor", "mean", "gamma"); +} + +void Convolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "Convolution2dQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "Convolution2dQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "Convolution2dQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "Convolution2dQueueDescriptor", 4, "output"); + + ValidatePointer(m_Weight, "Convolution2dQueueDescriptor", "weight"); + ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "Convolution2dQueueDescriptor", 4, "weight"); + ValidateTensorDataType(m_Weight->GetTensorInfo(), workloadInfo.m_InputTensorInfos[0].GetDataType(), + "Convolution2dQueueDescriptor", "weight"); + if (m_Parameters.m_BiasEnabled) + { + ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "Convolution2dQueueDescriptor", 1, "bias"); + ValidateTensorDataType(m_Bias->GetTensorInfo(), + GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), + "Convolution2dQueueDescriptor", "bias"); + ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), + workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "Convolution2dQueueDescriptor"); + } + + ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], "Convolution2dQueueDescriptor", "input", "weights", "output"); +} + +void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "DepthwiseConvolution2dQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "DepthwiseConvolution2dQueueDescriptor"); + + ValidateTensorNumDimensions( + workloadInfo.m_InputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions( + workloadInfo.m_OutputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", 4, "output"); + + ValidatePointer(m_Weight, "DepthwiseConvolution2dQueueDescriptor", "weight"); + ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 4, "weight"); + + //inputChannels * channelMultiplier should be equal to outputChannels. + const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0]; + const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1]; + const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[1]; + if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels) + { + throw InvalidArgumentException( + boost::str(boost::format("DepthwiseConvolution2dQueueDescriptor: output_channels (provided %1%) should be " + "equal to input_channels (provided %2%) multiplied by channel_multiplier " + "(provided %3%).") + % numWeightOutputChannels % numWeightInputChannels % numWeightChannelMultiplier)); + } + + if (m_Parameters.m_BiasEnabled) + { + ValidatePointer(m_Bias, "DepthwiseConvolution2dQueueDescriptor", "bias"); + ValidateTensorNumDimensions(m_Bias->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 1, "bias"); + ValidateBiasTensorQuantization(m_Bias->GetTensorInfo(), + workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor"); + + ValidateTensorDataType(m_Bias->GetTensorInfo(), + GetBiasDataType(workloadInfo.m_InputTensorInfos[0].GetDataType()), + "DepthwiseConvolution2dQueueDescriptor", "bias"); + } + + ValidateTensorQuantizationMultiplier(workloadInfo.m_InputTensorInfos[0], m_Weight->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], "DepthwiseConvolution2dQueueDescriptor", "input", "weights", "output"); +} + +void PermuteQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "PermuteQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "PermuteQueueDescriptor"); + + const PermutationVector& mapping = m_Parameters.m_DimMappings; + + const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& output = workloadInfo.m_OutputTensorInfos[0]; + + ValidateTensorNumDimensions(input, "PermuteQueueDescriptor", mapping.GetSize(), "input"); + ValidateTensorNumDimensions(output, "PermuteQueueDescriptor", mapping.GetSize(), "output"); + + for (unsigned int i = 0; i < mapping.GetSize(); ++i) + { + if (input.GetShape()[i] != output.GetShape()[mapping[i]]) + { + throw InvalidArgumentException("PermuteQueueDescriptor: src dimension " + to_string(i) + + " (=" + to_string(input.GetShape()[i]) + ") " + + "must match dst dimension " + to_string(mapping[i]) + + " (=" + to_string(output.GetShape()[mapping[i]]) + ")"); + } + } +} + +void Pooling2dQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "Pooling2dQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "Pooling2dQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "Pooling2dQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "Pooling2dQueueDescriptor", 4, "output"); +} + +void ResizeBilinearQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ResizeBilinearQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ResizeBilinearQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "ResizeBilinearQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "ResizeBilinearQueueDescriptor", 4, "output"); + + // Resizes bilinear only changes width and height: batch and channel count must match. + { + const unsigned int inputBatchSize = workloadInfo.m_InputTensorInfos[0].GetShape()[0]; + const unsigned int outputBatchSize = workloadInfo.m_OutputTensorInfos[0].GetShape()[0]; + if (inputBatchSize != outputBatchSize) + { + throw InvalidArgumentException( + boost::str(boost::format("ResizeBilinearQueueDescriptor: Input batch size (%1%) " + "does not match output batch size (%2%)") % inputBatchSize % outputBatchSize)); + } + } + + { + const unsigned int inputChannelCount = workloadInfo.m_InputTensorInfos[0].GetShape()[1]; + const unsigned int outputChannelCount = workloadInfo.m_OutputTensorInfos[0].GetShape()[1]; + if (inputChannelCount != outputChannelCount) + { + throw InvalidArgumentException( + boost::str(boost::format("ResizeBilinearQueueDescriptor: Input channel count (%1%) " + "does not match output channel count (%2%)") % inputChannelCount % outputChannelCount)); + } + } +} + +void FakeQuantizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "FakeQuantizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "FakeQuantizationQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "FakeQuantizationQueueDescriptor", 2, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "FakeQuantizationQueueDescriptor", 2, "output"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "FakeQuantizationQueueDescriptor", + "input", + "output"); + if (m_Parameters.m_Min > m_Parameters.m_Max) + { + throw InvalidArgumentException("FakeQuantizationQueueDescriptor: min cannot be greater than max"); + } + +} + +void L2NormalizationQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "L2NormalizationQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "L2NormalizationQueueDescriptor"); + + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "L2NormalizationQueueDescriptor", 4, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "L2NormalizationQueueDescriptor", 4, "output"); + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "L2NormalizationQueueDescriptor", + "input", + "output"); +} + +void ConstantQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateNoInputs(workloadInfo, "ConstantQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ConstantQueueDescriptor"); + + if (!m_LayerOutput) + { + throw InvalidArgumentException("ConstantQueueDescriptor: No const input specified"); + } + + ValidateTensorShapesMatch(m_LayerOutput->GetTensorInfo(), + workloadInfo.m_OutputTensorInfos[0], + "ConstantQueueDescriptor", + "constant", + "output"); +} + +void ReshapeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ReshapeQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ReshapeQueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos[0].GetNumElements() != workloadInfo.m_OutputTensorInfos[0].GetNumElements()) + { + throw InvalidArgumentException("ReshapeQueueDescriptor: Input tensor has " + + to_string(workloadInfo.m_InputTensorInfos[0].GetNumElements()) + " but output tensor has " + + to_string(workloadInfo.m_OutputTensorInfos[0].GetNumElements()) + " elements."); + } +} + +void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "FloorQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "FlootQueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos[0] != workloadInfo.m_OutputTensorInfos[0]) + { + throw InvalidArgumentException("FloorQueueDescriptor: Input and output tensor infos do not match."); + } +} + +void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0], "LstmQueueDescriptor", 2, "input"); + ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0], "LstmQueueDescriptor", 2, "output"); +} + +void ConvertFp32ToFp16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ConvertFp32ToFp16QueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ConvertFp32ToFp16QueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos[0].GetDataType() != DataType::Float32) + { + throw InvalidArgumentException("ConvertFp32ToFp16QueueDescriptor: Input tensor type must be Float32."); + } + + if (workloadInfo.m_OutputTensorInfos[0].GetDataType() != DataType::Float16) + { + throw InvalidArgumentException("ConvertFp32ToFp16QueueDescriptor: Output tensor type must be Float16."); + } + + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "ConvertFp32ToFp16QueueDescriptor", + "input", + "output"); +} + +void ConvertFp16ToFp32QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "ConvertFp16ToFp32QueueDescriptor"); + ValidateSingleOutput(workloadInfo, "ConvertFp16ToFp32QueueDescriptor"); + + if (workloadInfo.m_InputTensorInfos[0].GetDataType() != DataType::Float16) + { + throw InvalidArgumentException("ConvertFp16ToFp32QueueDescriptor: Input tensor type must be Float16."); + } + if (workloadInfo.m_OutputTensorInfos[0].GetDataType() != DataType::Float32) + { + throw InvalidArgumentException("ConvertFp16ToFp32QueueDescriptor: Output tensor type must be Float32."); + } + + ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_OutputTensorInfos[0], + "ConvertFp16ToFp32QueueDescriptor", + "input", + "output"); +} + +void DivisionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "DivisionQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "DivisionQueueDescriptor"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "DivisionQueueDescriptor", + "first input", + "second input"); +} + +void SubtractionQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateTwoInputs(workloadInfo, "SubtractionQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "SubtractionQueueDescriptor"); + + ValidateBroadcastTensorShapesMatch(workloadInfo.m_InputTensorInfos[0], + workloadInfo.m_InputTensorInfos[1], + workloadInfo.m_OutputTensorInfos[0], + "SubtractionQueueDescriptor", + "first input", + "second input"); +} + +void MeanQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "MeanQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "MeanQueueDescriptor"); + + const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& output = workloadInfo.m_OutputTensorInfos[0]; + + if (m_Parameters.m_KeepDims) + { + ValidateTensorNumDimensions(output, "MeanQueueDescriptor", input.GetNumDimensions(), "output"); + } + else if (m_Parameters.m_Axis.empty()) + { + ValidateTensorNumDimensions(output, "MeanQueueDescriptor", 1, "output"); + } + else + { + auto outputDim = input.GetNumDimensions() - boost::numeric_cast(m_Parameters.m_Axis.size()); + ValidateTensorNumDimensions(output, + "MeanQueueDescriptor", + outputDim > 0 ? outputDim : 1, + "output"); + } +} + +void PadQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + ValidateSingleInput(workloadInfo, "PadQueueDescriptor"); + ValidateSingleOutput(workloadInfo, "PadQueueDescriptor"); + + const TensorInfo& input = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& output = workloadInfo.m_OutputTensorInfos[1]; + // input and output should have the same number of dimensions + ValidateTensorNumDimensions(output, "PadQueueDescriptor", input.GetNumDimensions(), "output"); + // there should be entry in the pad list for each dimension in the input tensor + if (m_Parameters.m_PadList.size() != input.GetNumDimensions()) { + throw InvalidArgumentException("Pad List should contain the same number of entries as there" + " are dimensions in the input tensor that is " + + to_string(input.GetNumDimensions()) + " entries " + + " not " + to_string(m_Parameters.m_PadList.size()) + " entries."); + } +} + +} //namespace armnn diff --git a/src/backends/WorkloadData.hpp b/src/backends/WorkloadData.hpp new file mode 100644 index 0000000000..b5b0402237 --- /dev/null +++ b/src/backends/WorkloadData.hpp @@ -0,0 +1,330 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "WorkloadDataFwd.hpp" + +#include "armnn/Types.hpp" +#include "armnn/Tensor.hpp" +#include "armnn/Descriptors.hpp" +#include "armnn/Exceptions.hpp" +#include "InternalTypes.hpp" +#include "OutputHandler.hpp" +#include "CpuTensorHandleFwd.hpp" + +namespace armnn +{ + +//A helper function that returns the bias data type required for given input data type. +DataType GetBiasDataType(DataType inputDataType); + +struct WorkloadInfo; + +struct QueueDescriptor +{ + std::vector m_Inputs; + std::vector m_Outputs; + + void ValidateInputsOutputs(const std::string& descName, + unsigned int numExpectedIn, unsigned int numExpectedOut) const; + + +protected: + ~QueueDescriptor() = default; + QueueDescriptor() = default; + QueueDescriptor(QueueDescriptor const&) = default; + QueueDescriptor& operator=(QueueDescriptor const&) = default; +}; + +// Base class for queue descriptors which contain parameters. +template +struct QueueDescriptorWithParameters : public QueueDescriptor +{ + LayerDescriptor m_Parameters; + +protected: + ~QueueDescriptorWithParameters() = default; + QueueDescriptorWithParameters() = default; + QueueDescriptorWithParameters(QueueDescriptorWithParameters const&) = default; + QueueDescriptorWithParameters& operator=(QueueDescriptorWithParameters const&) = default; +}; + +struct MemCopyQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +using InputQueueDescriptor = MemCopyQueueDescriptor; +using OutputQueueDescriptor = MemCopyQueueDescriptor; + +// Softmax layer workload data. +struct SoftmaxQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Splitter layer workload data. +struct SplitterQueueDescriptor : QueueDescriptorWithParameters +{ + struct ViewOrigin + { + ViewOrigin() {} + ViewOrigin(std::vector const& origin) : m_Origin(origin) {} + + //View origin (size of the vector is the same as number of dimensions of the view). + std::vector m_Origin; + }; + + //View defines a tensor that will be carved from the input tensor. + //View origins are stored here, the extents are defined by sizes of the output tensors. + std::vector m_ViewOrigins; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Merger layer workload data. +struct MergerQueueDescriptor : QueueDescriptorWithParameters +{ + struct ViewOrigin + { + ViewOrigin() {} + ViewOrigin(const std::vector& origin) : m_Origin(origin) {} + + //View origin (size of the vector is the same as number of dimensions of the view). + std::vector m_Origin; + }; + + //View defines a sub-area of the output tensor that will be filled with the corresponding input tensor. + //View origins are stored here, the extents are defined by sizes of the input tensors. + std::vector m_ViewOrigins; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Activation layer workload data. +struct ActivationQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Fully connected layer workload data. +struct FullyConnectedQueueDescriptor : QueueDescriptorWithParameters +{ + FullyConnectedQueueDescriptor() + : m_Weight(nullptr) + , m_Bias(nullptr) + { + } + + const ConstCpuTensorHandle* m_Weight; + const ConstCpuTensorHandle* m_Bias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Permute layer workload data. +struct PermuteQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Pooling 2D layer workload data. +struct Pooling2dQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Convolution 2D layer workload data. +struct Convolution2dQueueDescriptor : QueueDescriptorWithParameters +{ + Convolution2dQueueDescriptor() + : m_Weight(nullptr) + , m_Bias(nullptr) + { + } + + const ConstCpuTensorHandle* m_Weight; + const ConstCpuTensorHandle* m_Bias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Depthwise Convolution 2D layer workload data. +struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters +{ + DepthwiseConvolution2dQueueDescriptor() + : m_Weight(nullptr) + , m_Bias(nullptr) + { + } + + const ConstCpuTensorHandle* m_Weight; + const ConstCpuTensorHandle* m_Bias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Normalization layer workload data. +struct NormalizationQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Add layer workload data. +struct AdditionQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Multiplication layer workload data. +struct MultiplicationQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Division layer workload data. +struct DivisionQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Subtraction layer workload data. +struct SubtractionQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Mean layer workload data. +struct MeanQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Pad layer workload data +struct PadQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +// Batch norm layer workload data. +struct BatchNormalizationQueueDescriptor : QueueDescriptorWithParameters +{ + BatchNormalizationQueueDescriptor() + : m_Mean(nullptr) + , m_Variance(nullptr) + , m_Beta(nullptr) + , m_Gamma(nullptr) + { + } + + const ConstCpuTensorHandle* m_Mean; + const ConstCpuTensorHandle* m_Variance; + const ConstCpuTensorHandle* m_Beta; + const ConstCpuTensorHandle* m_Gamma; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ResizeBilinearQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct FakeQuantizationQueueDescriptor : QueueDescriptorWithParameters +{ + FakeQuantizationQueueDescriptor() + : m_Min(nullptr) + , m_Max(nullptr) + { + } + + const ConstCpuTensorHandle* m_Min; + const ConstCpuTensorHandle* m_Max; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct L2NormalizationQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ConstantQueueDescriptor : QueueDescriptor +{ + ConstantQueueDescriptor() + : m_LayerOutput(nullptr) + { + } + + const ConstCpuTensorHandle* m_LayerOutput; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ReshapeQueueDescriptor : QueueDescriptorWithParameters +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct FloorQueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct LstmQueueDescriptor : QueueDescriptorWithParameters +{ + LstmQueueDescriptor() + : m_InputToInputWeights(nullptr) + , m_InputToForgetWeights(nullptr) + , m_InputToCellWeights(nullptr) + , m_InputToOutputWeights(nullptr) + , m_RecurrentToInputWeights(nullptr) + , m_RecurrentToForgetWeights(nullptr) + , m_RecurrentToCellWeights(nullptr) + , m_RecurrentToOutputWeights(nullptr) + , m_CellToInputWeights(nullptr) + , m_CellToForgetWeights(nullptr) + , m_CellToOutputWeights(nullptr) + , m_InputGateBias(nullptr) + , m_ForgetGateBias(nullptr) + , m_CellBias(nullptr) + , m_OutputGateBias(nullptr) + , m_ProjectionWeights(nullptr) + , m_ProjectionBias(nullptr) + { + } + + const ConstCpuTensorHandle* m_InputToInputWeights; + const ConstCpuTensorHandle* m_InputToForgetWeights; + const ConstCpuTensorHandle* m_InputToCellWeights; + const ConstCpuTensorHandle* m_InputToOutputWeights; + const ConstCpuTensorHandle* m_RecurrentToInputWeights; + const ConstCpuTensorHandle* m_RecurrentToForgetWeights; + const ConstCpuTensorHandle* m_RecurrentToCellWeights; + const ConstCpuTensorHandle* m_RecurrentToOutputWeights; + const ConstCpuTensorHandle* m_CellToInputWeights; + const ConstCpuTensorHandle* m_CellToForgetWeights; + const ConstCpuTensorHandle* m_CellToOutputWeights; + const ConstCpuTensorHandle* m_InputGateBias; + const ConstCpuTensorHandle* m_ForgetGateBias; + const ConstCpuTensorHandle* m_CellBias; + const ConstCpuTensorHandle* m_OutputGateBias; + const ConstCpuTensorHandle* m_ProjectionWeights; + const ConstCpuTensorHandle* m_ProjectionBias; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ConvertFp16ToFp32QueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +struct ConvertFp32ToFp16QueueDescriptor : QueueDescriptor +{ + void Validate(const WorkloadInfo& workloadInfo) const; +}; + +} //namespace armnn diff --git a/src/backends/WorkloadDataCollector.hpp b/src/backends/WorkloadDataCollector.hpp new file mode 100644 index 0000000000..ac8c2e2ab9 --- /dev/null +++ b/src/backends/WorkloadDataCollector.hpp @@ -0,0 +1,36 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include + +namespace armnn +{ +class ITensorHandle; + +class WorkloadDataCollector +{ +public: + WorkloadDataCollector(std::vector& handles, std::vector& infos) + : m_Handles(handles) + , m_Infos(infos) + { + } + + void Push(ITensorHandle* handle, const TensorInfo& info) + { + m_Handles.push_back(handle); + m_Infos.push_back(info); + } + +private: + std::vector& m_Handles; + std::vector& m_Infos; +}; + + +} //namespace armnn diff --git a/src/backends/WorkloadDataFwd.hpp b/src/backends/WorkloadDataFwd.hpp new file mode 100644 index 0000000000..9ae20e0ce1 --- /dev/null +++ b/src/backends/WorkloadDataFwd.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +namespace armnn +{ + +struct QueueDescriptor; +template +struct QueueDescriptorWithParameters; +struct SoftmaxQueueDescriptor; +struct SplitterQueueDescriptor; +struct MergerQueueDescriptor; +struct ActivationQueueDescriptor; +struct FullyConnectedQueueDescriptor; +struct PermuteQueueDescriptor; +struct Pooling2dQueueDescriptor; +struct Convolution2dQueueDescriptor; +struct NormalizationQueueDescriptor; +struct MultiplicationQueueDescriptor; +struct BatchNormalizationQueueDescriptor; +struct FakeQuantizationQueueDescriptor; +struct ReshapeQueueDescriptor; + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/WorkloadFactory.cpp b/src/backends/WorkloadFactory.cpp new file mode 100644 index 0000000000..773a8c1a18 --- /dev/null +++ b/src/backends/WorkloadFactory.cpp @@ -0,0 +1,571 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "WorkloadFactory.hpp" +#include "RefWorkloadFactory.hpp" +#include "NeonWorkloadFactory.hpp" +#include "ClWorkloadFactory.hpp" + +#include "armnn/Types.hpp" +#include "armnn/LayerSupport.hpp" +#include "Layer.hpp" +#include "LayersFwd.hpp" +#include "CpuTensorHandle.hpp" + +#include +#include +#include + +namespace armnn +{ + +namespace +{ + const TensorInfo OverrideDataType(const TensorInfo& info, boost::optional type) + { + if (type == boost::none) + { + return info; + } + + return TensorInfo(info.GetShape(), type.get(), info.GetQuantizationScale(), info.GetQuantizationOffset()); + } + + boost::optional GetBiasTypeFromWeightsType(boost::optional weightsType) + { + if (weightsType == boost::none) + { + return weightsType; + } + + switch(weightsType.get()) + { + case DataType::Float16: + case DataType::Float32: + return weightsType; + case DataType::QuantisedAsymm8: + return DataType::Signed32; + default: + BOOST_ASSERT_MSG(false, "GetBiasTypeFromWeightsType(): Unsupported data type."); + } + return boost::none; + } +} + +bool IWorkloadFactory::IsLayerSupported(Compute compute, const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + constexpr size_t reasonCapacity = 1024; + char reason[reasonCapacity]; + bool result; + switch(layer.GetType()) + { + case LayerType::Activation: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsActivationSupported(compute, + OverrideDataType(input, dataType), + OverrideDataType(output, dataType), + cLayer->GetParameters(), + reason, + reasonCapacity); + break; + } + case LayerType::Addition: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsAdditionSupported(compute, + OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType), + reason, + reasonCapacity); + break; + } + case LayerType::BatchNormalization: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + const TensorInfo& mean = cLayer->m_Mean->GetTensorInfo(); + const TensorInfo& var = cLayer->m_Variance->GetTensorInfo(); + const TensorInfo& beta = cLayer->m_Beta->GetTensorInfo(); + const TensorInfo& gamma = cLayer->m_Gamma->GetTensorInfo(); + result = IsBatchNormalizationSupported(compute, + OverrideDataType(input, dataType), + OverrideDataType(output, dataType), + OverrideDataType(mean, dataType), + OverrideDataType(var, dataType), + OverrideDataType(beta, dataType), + OverrideDataType(gamma, dataType), + cLayer->GetParameters(), + reason, reasonCapacity); + break; + } + case LayerType::Constant: + { + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsConstantSupported(compute, OverrideDataType(output, dataType), reason, reasonCapacity); + break; + } + case LayerType::ConvertFp16ToFp32: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsConvertFp16ToFp32Supported(compute, input, output, reason, reasonCapacity); + break; + } + case LayerType::ConvertFp32ToFp16: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsConvertFp32ToFp16Supported(compute, input, output, reason, reasonCapacity); + break; + } + case LayerType::Convolution2d: + { + auto cLayer = boost::polymorphic_downcast(&layer); + + const TensorInfo input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), + dataType); + const TensorInfo output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); + BOOST_ASSERT(cLayer->m_Weight.get() != nullptr); + + const Convolution2dDescriptor& descriptor = cLayer->GetParameters(); + + // Construct optional biases object based on the value of m_BiasEnabled + boost::optional biases(boost::none); + if (descriptor.m_BiasEnabled) + { + biases = boost::make_optional( + OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType))); + } + + result = IsConvolution2dSupported(compute, + input, + output, + descriptor, + OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), + biases, + reason, + reasonCapacity); + break; + } + case LayerType::MemCopy: + { + // MemCopy supported for CpuRef, CpuAcc and GpuAcc backends, + // (also treat Undefined as CpuRef to avoid breaking lots of Unit tests). + result = compute == Compute::CpuRef || compute == Compute::Undefined + || compute == Compute::CpuAcc || compute == Compute::GpuAcc; + strcpy(reason, "Unsupported backend type"); + break; + } + case LayerType::DepthwiseConvolution2d: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), + dataType); + const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); + BOOST_ASSERT(cLayer->m_Weight.get() != nullptr); + + const DepthwiseConvolution2dDescriptor& descriptor = cLayer->GetParameters(); + + // Construct optional biases object based on the value of m_BiasEnabled + boost::optional biases(boost::none); + if (descriptor.m_BiasEnabled) + { + biases = boost::make_optional( + OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType))); + } + + result = IsDepthwiseConvolutionSupported(compute, + input, + output, + descriptor, + OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), + biases, + reason, + reasonCapacity); + break; + } + case LayerType::FakeQuantization: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsFakeQuantizationSupported(compute, OverrideDataType(input, dataType), cLayer->GetParameters(), + reason, reasonCapacity); + break; + } + case LayerType::Floor: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsFloorSupported(compute, OverrideDataType(input, dataType), OverrideDataType(output, dataType), + reason, reasonCapacity); + break; + } + case LayerType::FullyConnected: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + BOOST_ASSERT(cLayer->m_Weight.get() != nullptr); + + TensorInfo biasInfo; + const TensorInfo * biasInfoPtr = nullptr; + static const TensorInfo dummyFloat16Bias(TensorShape({1,1,1,1}), DataType::Float16); + static const TensorInfo dummyFloat32Bias(TensorShape({1,1,1,1}), DataType::Float32); + static const TensorInfo dummyQA8Bias(TensorShape({1,1,1,1}), DataType::Signed32); + + const FullyConnectedDescriptor& descriptor = cLayer->GetParameters(); + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(cLayer->m_Bias.get() != nullptr); + biasInfo = OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType)); + biasInfoPtr = &biasInfo; + } + else + { + // If biases are not enabled pass a dummy tensorinfo for the validation + switch(input.GetDataType()) + { + case DataType::Float16: + { + biasInfoPtr = &dummyFloat16Bias; + break; + } + case DataType::Float32: + { + biasInfoPtr = &dummyFloat32Bias; + break; + } + case DataType::QuantisedAsymm8: + { + biasInfoPtr = &dummyQA8Bias; + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unexpected bias type"); + } + } + } + + result = IsFullyConnectedSupported(compute, + OverrideDataType(input, dataType), + OverrideDataType(output, dataType), + OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType), + *biasInfoPtr, + descriptor, + reason, + reasonCapacity); + break; + } + case LayerType::Input: + { + const TensorInfo& input = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsInputSupported(compute, OverrideDataType(input, dataType), reason, reasonCapacity); + break; + } + case LayerType::L2Normalization: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsL2NormalizationSupported(compute, OverrideDataType(input, dataType), + OverrideDataType(output, dataType), reason, reasonCapacity); + break; + } + case LayerType::Lstm: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const LstmDescriptor& descriptor = cLayer->GetParameters(); + + // All inputs. + const TensorInfo& input = OverrideDataType(layer.GetInputSlot(0).GetConnection()->GetTensorInfo(), + dataType); + const TensorInfo& outputStateIn = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), + dataType); + const TensorInfo& cellStateIn = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), + dataType); + // All outputs + const TensorInfo& scratchBuffer = OverrideDataType(layer.GetOutputSlot(0).GetTensorInfo(), dataType); + const TensorInfo& outputStateOut = OverrideDataType(layer.GetOutputSlot(1).GetTensorInfo(), dataType); + const TensorInfo& cellStateOut = OverrideDataType(layer.GetOutputSlot(2).GetTensorInfo(), dataType); + const TensorInfo& output = OverrideDataType(layer.GetOutputSlot(3).GetTensorInfo(), dataType); + + // Basic parameters + const TensorInfo& inputToForgetWeights + = OverrideDataType(cLayer->m_BasicParameters.m_InputToForgetWeights->GetTensorInfo(), dataType); + const TensorInfo& inputToCellWeights + = OverrideDataType(cLayer->m_BasicParameters.m_InputToCellWeights->GetTensorInfo(), dataType); + const TensorInfo& inputToOutputWeights + = OverrideDataType(cLayer->m_BasicParameters.m_InputToOutputWeights->GetTensorInfo(), dataType); + const TensorInfo& recurrentToForgetWeights + = OverrideDataType(cLayer->m_BasicParameters.m_RecurrentToForgetWeights->GetTensorInfo(), dataType); + const TensorInfo& recurrentToCellWeights + = OverrideDataType(cLayer->m_BasicParameters.m_RecurrentToCellWeights->GetTensorInfo(), dataType); + const TensorInfo& recurrentToOutputWeights + = OverrideDataType(cLayer->m_BasicParameters.m_RecurrentToOutputWeights->GetTensorInfo(), dataType); + const TensorInfo& forgetGateBias + = OverrideDataType(cLayer->m_BasicParameters.m_ForgetGateBias->GetTensorInfo(), dataType); + const TensorInfo& cellBias + = OverrideDataType(cLayer->m_BasicParameters.m_CellBias->GetTensorInfo(), dataType); + const TensorInfo& outputGateBias + = OverrideDataType(cLayer->m_BasicParameters.m_OutputGateBias->GetTensorInfo(), dataType); + + // Optional parameters + const TensorInfo* inputToInputWeights = nullptr; + const TensorInfo* recurrentToInputWeights = nullptr; + const TensorInfo* cellToInputWeights = nullptr; + const TensorInfo* inputGateBias = nullptr; + const TensorInfo* projectionWeights = nullptr; + const TensorInfo* projectionBias = nullptr; + const TensorInfo* cellToForgetWeights = nullptr; + const TensorInfo* cellToOutputWeights = nullptr; + + TensorInfo optInputToInputWeights; + TensorInfo optRecurrentToInputWeights; + TensorInfo optCellToInputWeights; + TensorInfo optInputGateBias; + TensorInfo optProjectionWeights; + TensorInfo optProjectionBias; + TensorInfo optCellToForgetWeights; + TensorInfo optCellToOutputWeights; + + if(!descriptor.m_CifgEnabled) + { + optInputToInputWeights = + OverrideDataType(cLayer->m_CifgParameters.m_InputToInputWeights->GetTensorInfo(), dataType); + inputToInputWeights = &optInputToInputWeights; + + optRecurrentToInputWeights = + OverrideDataType(cLayer->m_CifgParameters.m_RecurrentToInputWeights->GetTensorInfo(), dataType); + recurrentToInputWeights = &optRecurrentToInputWeights; + if (cLayer->m_CifgParameters.m_CellToInputWeights != nullptr) + { + optCellToInputWeights = + OverrideDataType(cLayer->m_CifgParameters.m_CellToInputWeights->GetTensorInfo(), dataType); + cellToInputWeights = &optCellToInputWeights; + } + optInputGateBias = + OverrideDataType(cLayer->m_CifgParameters.m_InputGateBias->GetTensorInfo(), dataType); + inputGateBias = &optInputGateBias; + } + + if(descriptor.m_ProjectionEnabled) + { + optProjectionWeights = + OverrideDataType(cLayer->m_ProjectionParameters.m_ProjectionWeights->GetTensorInfo(), dataType); + projectionWeights = &optProjectionWeights; + if (cLayer->m_ProjectionParameters.m_ProjectionBias != nullptr) + { + optProjectionBias = + OverrideDataType(cLayer->m_ProjectionParameters.m_ProjectionBias->GetTensorInfo(), dataType); + projectionBias = &optProjectionBias; + } + } + + if(descriptor.m_PeepholeEnabled) + { + optCellToForgetWeights = + OverrideDataType(cLayer->m_PeepholeParameters.m_CellToForgetWeights->GetTensorInfo(), dataType); + cellToForgetWeights = &optCellToForgetWeights; + optCellToOutputWeights = + OverrideDataType(cLayer->m_PeepholeParameters.m_CellToOutputWeights->GetTensorInfo(), dataType); + cellToOutputWeights = &optCellToOutputWeights; + } + + result = IsLstmSupported(compute, + input, + outputStateIn, + cellStateIn, + scratchBuffer, + outputStateOut, + cellStateOut, + output, + descriptor, + inputToForgetWeights, + inputToCellWeights, + inputToOutputWeights, + recurrentToForgetWeights, + recurrentToCellWeights, + recurrentToOutputWeights, + forgetGateBias, + cellBias, + outputGateBias, + inputToInputWeights, + recurrentToInputWeights, + cellToInputWeights, + inputGateBias, + projectionWeights, + projectionBias, + cellToForgetWeights, + cellToOutputWeights, + reason, + reasonCapacity); + break; + } + case LayerType::Merger: + { + auto cLayer = boost::polymorphic_downcast(&layer); + + // Get vector of all inputs. + auto getTensorInfo = [&dataType](const InputSlot& slot) + { + return OverrideDataType(slot.GetConnectedOutputSlot()->GetTensorInfo(), dataType); + }; + auto beginI = boost::make_transform_iterator(layer.GetInputSlots().begin(), getTensorInfo); + auto endI = boost::make_transform_iterator(layer.GetInputSlots().end(), getTensorInfo); + std::vector inputs(beginI, endI); + + auto getTensorInfoPtr = [](const TensorInfo& info) + { + return &info; + }; + auto beginPtr = boost::make_transform_iterator(inputs.begin(), getTensorInfoPtr); + auto endPtr = boost::make_transform_iterator(inputs.end(), getTensorInfoPtr); + std::vector inputPtrs(beginPtr, endPtr); + + result = IsMergerSupported(compute, inputPtrs, cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Multiplication: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsMultiplicationSupported(compute, + OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType), + reason, + reasonCapacity); + break; + } + case LayerType::Normalization: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsNormalizationSupported(compute, OverrideDataType(input, dataType), + OverrideDataType(output, dataType), cLayer->GetParameters(), reason, + reasonCapacity); + break; + } + case LayerType::Output: + { + const TensorInfo& output = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsOutputSupported(compute, OverrideDataType(output, dataType), reason, reasonCapacity); + break; + } + case LayerType::Permute: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsPermuteSupported(compute, OverrideDataType(input, dataType), OverrideDataType(output, dataType), + cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Pooling2d: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsPooling2dSupported(compute, OverrideDataType(input, dataType), + OverrideDataType(output, dataType), cLayer->GetParameters(), reason, + reasonCapacity); + break; + } + case LayerType::Division: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsDivisionSupported(compute, + OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType), + reason, + reasonCapacity); + break; + } + case LayerType::Reshape: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsReshapeSupported(compute, OverrideDataType(input, dataType), reason, reasonCapacity); + break; + } + case LayerType::ResizeBilinear: + { + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsResizeBilinearSupported(compute, OverrideDataType(input, dataType), reason, reasonCapacity); + break; + } + case LayerType::Softmax: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsSoftmaxSupported(compute, OverrideDataType(input, dataType), OverrideDataType(output, dataType), + cLayer->GetParameters(), reason, reasonCapacity); + break; + } + case LayerType::Splitter: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = IsSplitterSupported(compute, OverrideDataType(input, dataType), cLayer->GetParameters(), reason, + reasonCapacity); + break; + } + case LayerType::Subtraction: + { + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsSubtractionSupported(compute, + OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType), + reason, + reasonCapacity); + break; + } + case LayerType::Mean: + { + auto cLayer = boost::polymorphic_downcast(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + result = IsMeanSupported(compute, + OverrideDataType(input, dataType), + OverrideDataType(output, dataType), + cLayer->GetParameters(), + reason, + reasonCapacity); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "WorkloadFactory did not recognise type of layer."); + strcpy(reason, "Unrecognised layer type"); + result = false; + break; + } + } + outReasonIfUnsupported = reason; + return result; +} + +bool IWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + return IsLayerSupported(layer.GetComputeDevice(), layer, dataType, outReasonIfUnsupported); +} + +} diff --git a/src/backends/WorkloadFactory.hpp b/src/backends/WorkloadFactory.hpp new file mode 100644 index 0000000000..fbc6134574 --- /dev/null +++ b/src/backends/WorkloadFactory.hpp @@ -0,0 +1,137 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Workload.hpp" +#include +#include "armnn/TensorFwd.hpp" +#include "OutputHandler.hpp" +#include + +namespace armnn +{ + +class Layer; + +// Workload factory interface for compute backends. +class IWorkloadFactory +{ +public: + virtual ~IWorkloadFactory() { } + + virtual Compute GetCompute() const = 0; + + /// Informs the memory manager that the network is finalized and ready for execution. + virtual void Finalize() { } + + /// Inform the memory manager to release the memory + virtual void Release() { } + + /// Inform the memory manager to acquire memory + virtual void Acquire() { } + + static bool IsLayerSupported(Compute compute, const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const = 0; + + virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin + ) const = 0; + + virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const = 0; + + virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const = 0; + + virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& Info) const = 0; +}; + +} //namespace armnn diff --git a/src/backends/WorkloadInfo.hpp b/src/backends/WorkloadInfo.hpp new file mode 100644 index 0000000000..304bc0bf06 --- /dev/null +++ b/src/backends/WorkloadInfo.hpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +namespace armnn +{ + +/// Contains information about inputs and outputs to a layer. +/// This is needed at construction of workloads, but are not stored. +struct WorkloadInfo +{ + std::vector m_InputTensorInfos; + std::vector m_OutputTensorInfos; +}; + +} //namespace armnn diff --git a/src/backends/WorkloadUtils.hpp b/src/backends/WorkloadUtils.hpp new file mode 100644 index 0000000000..65c58eabd9 --- /dev/null +++ b/src/backends/WorkloadUtils.hpp @@ -0,0 +1,139 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "armnn/Tensor.hpp" +#include "ITensorHandle.hpp" + +#include + +namespace armnn +{ +namespace +{ +template +void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg) +{ + if (idx >= num) + { + return; + } + + arg = array[(num - 1) - idx]; + idx++; +}; + +template +void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args& ... args) +{ + AssignValues(num, idx, array, assignee); + + AssignValues(num, idx, array, args...); +} +} // namespace + +template +void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy) +{ + static_assert(MaxNumOfTensorDimensions == 4, "Please update CopyTensorContents"); + + TensorShape srcStrides = srcTensor->GetStrides(); + const TensorShape& srcShape = srcTensor->GetShape(); + TensorShape dstStrides = dstTensor->GetStrides(); + const TensorShape& dstShape = dstTensor->GetShape(); + + size_t srcBatches = 1; + size_t srcChannels = 1; + size_t srcHeight = 1; + size_t srcWidth = 1; + AssignValues(srcShape.GetNumDimensions(),0, srcShape, + srcWidth, + srcHeight, + srcChannels, + srcBatches); + + size_t srcBatchStride = 0; + size_t srcChannelStride = 0; + size_t srcHeightStride = 0; + size_t srcWidthStride = 0; + AssignValues(srcStrides.GetNumDimensions(),0, srcStrides, + srcWidthStride, + srcHeightStride, + srcChannelStride, + srcBatchStride); + + size_t dstBatches = 1; + size_t dstChannels = 1; + size_t dstHeight = 1; + size_t dstWidth = 1; + AssignValues(dstShape.GetNumDimensions(),0, dstShape, + dstWidth, + dstHeight, + dstChannels, + dstBatches); + + size_t dstBatchStride = 0; + size_t dstChannelStride = 0; + size_t dstHeightStride = 0; + size_t dstWidthStride = 0; + AssignValues(dstStrides.GetNumDimensions(),0, dstStrides, + dstWidthStride, + dstHeightStride, + dstChannelStride, + dstBatchStride); + + auto srcData = static_cast(srcTensor->Map()); + auto dstData = static_cast(dstTensor->Map()); + + size_t copyLength = std::min(srcWidth*srcWidthStride, dstWidth*dstWidthStride); + size_t copyHeight = std::min(srcHeight, dstHeight); + size_t copyChannels = std::min(srcChannels, dstChannels); + size_t copyBatches = std::min(srcBatches, dstBatches); + + for(unsigned int b=0; b < copyBatches; ++b) + { + auto srcPtrBatch = srcData; + auto dstPtrBatch = dstData; + for (unsigned int c=0; c< copyChannels; ++c) + { + auto srcPtrChannel = srcData; + auto dstPtrChannel = dstData; + for (unsigned int h=0; h < copyHeight; ++h) + { + copy(dstData, srcData, copyLength); + dstData += dstHeightStride; + srcData += srcHeightStride; + } + dstData += (static_cast(dstChannelStride) - (dstData - dstPtrChannel)); + srcData += (static_cast(srcChannelStride) - (srcData - srcPtrChannel)); + } + dstData += (static_cast(dstBatchStride)-(dstData - dstPtrBatch)); + srcData += (static_cast(srcBatchStride)-(srcData - srcPtrBatch)); + } + + srcTensor->Unmap(); + dstTensor->Unmap(); +} + +template +void GatherTensorHandlePairs(const DescriptorType& descriptor, + std::vector>& tensorHandlePairs) +{ + const unsigned int numInputs = static_cast(descriptor.m_Inputs.size()); + tensorHandlePairs.reserve(numInputs); + + for (unsigned int i = 0; i < numInputs; ++i) + { + SrcTensorHandleType* const srcTensorHandle = boost::polymorphic_downcast( + descriptor.m_Inputs[i]); + DstTensorHandleType* const dstTensorHandle = boost::polymorphic_downcast( + descriptor.m_Outputs[i]); + + tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle); + } +} + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/backends.cmake b/src/backends/backends.cmake new file mode 100644 index 0000000000..462365c252 --- /dev/null +++ b/src/backends/backends.cmake @@ -0,0 +1,15 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends) +list(APPEND armnnLibraries armnnBackendsCommon) + +# single place to use wildcards, so we can include +# yet unknown backend modules +FILE(GLOB backendIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/backend.cmake) + +foreach(backendInclude ${backendIncludes}) + include(${backendInclude}) +endforeach() diff --git a/src/backends/test/ActivationFixture.hpp b/src/backends/test/ActivationFixture.hpp new file mode 100644 index 0000000000..d9d4ca7470 --- /dev/null +++ b/src/backends/test/ActivationFixture.hpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +struct ActivationFixture +{ + ActivationFixture() + { + auto boostArrayExtents = boost::extents + [boost::numeric_cast(batchSize)] + [boost::numeric_cast(channels)] + [boost::numeric_cast(height)] + [boost::numeric_cast(width)]; + output.resize(boostArrayExtents); + outputExpected.resize(boostArrayExtents); + input.resize(boostArrayExtents); + + unsigned int inputShape[] = { batchSize, channels, height, width }; + unsigned int outputShape[] = { batchSize, channels, height, width }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + input = MakeRandomTensor(inputTensorInfo, 21453); + } + + unsigned int width = 17; + unsigned int height = 29; + unsigned int channels = 2; + unsigned int batchSize = 5; + + boost::multi_array output; + boost::multi_array outputExpected; + boost::multi_array input; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + // Parameters used by some of the activation functions. + float a = 0.234f; + float b = -12.345f; +}; + + +struct PositiveActivationFixture : public ActivationFixture +{ + PositiveActivationFixture() + { + input = MakeRandomTensor(inputTensorInfo, 2342423, 0.0f, 1.0f); + } +}; \ No newline at end of file diff --git a/src/backends/test/ActivationTestImpl.hpp b/src/backends/test/ActivationTestImpl.hpp new file mode 100644 index 0000000000..a5d327c287 --- /dev/null +++ b/src/backends/test/ActivationTestImpl.hpp @@ -0,0 +1,560 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" +#include "ActivationFixture.hpp" + +#include + +template +LayerTestResult BoundedReLuTestCommon(armnn::IWorkloadFactory& workloadFactory, + float upperBound, float lowerBound, + float inputScale, int32_t inputOffset, float outputScale, int32_t outputOffset, + const std::vector& inputData, const std::vector& outputExpectedData, + unsigned int inputWidth, unsigned int inputHeight, + unsigned int inputChannels, unsigned int inputBatchSize) +{ + unsigned int outputWidth = inputWidth; + unsigned int outputHeight = inputHeight; + unsigned int outputChannels = inputChannels; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(inputScale); + inputTensorInfo.SetQuantizationOffset(inputOffset); + + outputTensorInfo.SetQuantizationScale(outputScale); + outputTensorInfo.SetQuantizationOffset(outputOffset); + } + + LayerTestResult result(inputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, inputData); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu. + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = armnn::ActivationFunction::BoundedReLu; + descriptor.m_Parameters.m_A = upperBound; + descriptor.m_Parameters.m_B = lowerBound; + + std::unique_ptr workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + result.outputExpected = MakeTensor(outputTensorInfo, outputExpectedData); + + return result; +} + +LayerTestResult BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 4u; + unsigned int inputHeight = 5u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector input = std::vector{ + -2.0f, 0.1f, 0.5f, 1.25f, + 0.786f, 0.9875f, -1.5f, 0.384f, + 1.0001f, 3.5f, 7.5f, 0.896f, + 2.126f, 2.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.1f, + }; + + // Calculated manually. + std::vector output = std::vector{ + -1.0f, 0.1f, 0.5f, 1.0f, + 0.786f, 0.9875f, -1.0f, 0.384f, + 1.0f, 1.0f, 1.0f, 0.896f, + 1.0f, 1.0f, 0.3f, 0.15f, + 0.999f, 1.0f, 0.89f, 1.0f, + }; + + return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, 1.0f, 0, 1.0f, 0, input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 4u; + unsigned int inputHeight = 5u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector input = std::vector{ + -1.0f, 0.1f, 0.5f, 6.25f, + 0.786f, 5.9875f, -0.5f, 0.384f, + 6.0001f, 3.5f, 7.5f, 0.896f, + 2.126f, 12.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.1f, + }; + + // Calculated manually. + std::vector output = std::vector{ + 0.0f, 0.1f, 0.5f, 6.0f, + 0.786f, 5.9875f, 0.0f, 0.384f, + 6.0f, 3.5f, 6.0f, 0.896f, + 2.126f, 6.0f, 0.3f, 0.15f, + 0.999f, 1.2f, 0.89f, 6.0f, + }; + + return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, 1.0f, 0, 1.0f, 0, input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 3u; + unsigned int inputHeight = 2u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector input = std::vector{ + 51, 124, 28, + 251, 8, 92 + }; + + // Calculated manually. + std::vector output = std::vector{ + 0, 122, 0, + 255, 0, 58 + }; + + float inputScale = 12.0f / 255.0f; + int32_t inputOffset = 63; + float outputScale = 6.0f / 255.0f; + int32_t outputOffset = 0; + + return BoundedReLuTestCommon(workloadFactory, 6.0f, 0.0f, + inputScale, inputOffset, outputScale, outputOffset, + input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +LayerTestResult BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int inputWidth = 3u; + unsigned int inputHeight = 2u; + unsigned int inputChannels = 1u; + unsigned int inputBatchSize = 1; + + std::vector input = std::vector{ + 51, 230, 28, + 251, 8, 92 + }; + + // Calculated manually. + std::vector output = std::vector{ + 51, 192, 32, + 192, 32, 92 + }; + + int32_t inputOffset = 112; + float inputScale = 0.0125f; + + return BoundedReLuTestCommon(workloadFactory, 1.0f, -1.0f, + inputScale, inputOffset, inputScale, inputOffset, // Input/output scale & offset same. + input, output, + inputWidth, inputHeight, inputChannels, inputBatchSize); +} + +namespace +{ + +struct BoundedReLuRandomInputTestTraits +{ + constexpr static unsigned int inputHeight = 31u; + constexpr static unsigned int inputWidth = 19u; + constexpr static unsigned int inputChannels = 4u; + constexpr static unsigned int inputBatchSize = 2; + + constexpr static unsigned int outputHeight = inputHeight; + constexpr static unsigned int outputWidth = inputWidth; + constexpr static unsigned int outputChannels = inputChannels; + constexpr static unsigned int outputBatchSize = inputBatchSize; + + static armnn::TensorInfo GetInputTensorInfo() + { + return armnn::TensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + } + + static armnn::TensorInfo GetOutputTensorInfo() + { + return armnn::TensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + } +}; + +boost::multi_array BoundedReLuRandomInputTest(armnn::IWorkloadFactory& workloadFactory, + float lowerBound, + float upperBound, + const armnn::ActivationDescriptor& activationDescriptor) +{ + const armnn::TensorInfo inputTensorInfo = BoundedReLuRandomInputTestTraits::GetInputTensorInfo(); + const armnn::TensorInfo outputTensorInfo = BoundedReLuRandomInputTestTraits::GetOutputTensorInfo(); + + boost::multi_array output(GetTensorShapeAsArray<4>(outputTensorInfo)); + + // Min/max random values passed to MakeRandomTensor are purposely outside of the ReLu + // range [lowerBound, upperBound]. + auto input = MakeRandomTensor(inputTensorInfo, 4605828, lowerBound - 5.0f, upperBound * 2.0f); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Set up bounded ReLu. + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + descriptor.m_Parameters = activationDescriptor; + + std::unique_ptr workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&output[0][0][0][0], outputHandle.get()); + + return output; +} + +} // namespace + +LayerTestResult CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& otherWorkloadFactory, + float upperBound, + float lowerBound) +{ + LayerTestResult result(BoundedReLuRandomInputTestTraits::GetOutputTensorInfo()); + + armnn::ActivationDescriptor activationDescriptor; + activationDescriptor.m_Function = armnn::ActivationFunction::BoundedReLu; + activationDescriptor.m_A = upperBound; + activationDescriptor.m_B = lowerBound; + + result.output = BoundedReLuRandomInputTest(workloadFactory, 0.0f, upperBound, activationDescriptor); + result.outputExpected = BoundedReLuRandomInputTest(otherWorkloadFactory, 0.0f, upperBound, activationDescriptor); + + return result; +} + +template +LayerTestResult ConstantLinearActivationTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputHeight = 20; + unsigned int inputWidth = 17; + unsigned int inputChannels = 3; + unsigned int batchSize = 5; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, inputChannels, inputHeight, inputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType()); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult ret(outputTensorInfo); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Do linear activation that should leave the tensor unchanged. + armnn::ActivationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_A = 1.0f; + data.m_Parameters.m_B = 0.0f; + data.m_Parameters.m_Function = armnn::ActivationFunction::Linear; + + std::unique_ptr workload = workloadFactory.CreateActivation(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + boost::multi_array input = MakeRandomTensor(inputTensorInfo, 7123561); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + // Ensure output equals input. + ret.outputExpected = input; + + return ret; +} + +LayerTestResult ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantLinearActivationTestCommon(workloadFactory); +} + +LayerTestResult ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantLinearActivationTestCommon(workloadFactory, 4.0f, 3); +} + +template +LayerTestResult SimpleActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::ActivationFunction activationFunction, + float activationParameterA, + float activationParameterB, + float qScale, + int32_t qOffset, + const std::vector& inputData, + const std::vector& outputExpectedData) +{ + constexpr static unsigned int inputWidth = 16u; + constexpr static unsigned int inputHeight = 1u; + constexpr static unsigned int inputChannels = 1u; + constexpr static unsigned int inputBatchSize = 1u; + + constexpr static unsigned int outputWidth = inputWidth; + constexpr static unsigned int outputHeight = inputHeight; + constexpr static unsigned int outputChannels = inputChannels; + constexpr static unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult result(inputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, inputData)); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + // Setup bounded ReLu. + armnn::ActivationQueueDescriptor descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(descriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + descriptor.m_Parameters.m_Function = activationFunction; + descriptor.m_Parameters.m_A = activationParameterA; + descriptor.m_Parameters.m_B = activationParameterB; + + std::unique_ptr workload = workloadFactory.CreateActivation(descriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + // Calculated manually. + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, outputExpectedData)); + + return result; +} + +template +LayerTestResult SimpleSigmoidTestCommon(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + std::vector inputData = { + -0.1f, -0.2f, -0.3f, -0.4f, + 0.1f, 0.2f, 0.3f, 0.4f, + -1.0f, -2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, 3.0f, 4.0f + }; + + // Calculate output values for input. + auto f = [](float value) + { + return 1.0f / (1.0f + std::exp(-value)); + }; + std::vector outputExpectedData(inputData.size()); + std::transform(inputData.begin(), inputData.end(), outputExpectedData.begin(), f); + + return SimpleActivationTest(workloadFactory, + armnn::ActivationFunction::Sigmoid, + 0.f, + 0.f, + qScale, + qOffset, + inputData, + outputExpectedData); +} + +LayerTestResult SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleSigmoidTestCommon(workloadFactory, 0.0f, 0); +} + +LayerTestResult SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleSigmoidTestCommon(workloadFactory, 0.1f, 50); +} + +template +LayerTestResult CompareActivationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize = 5, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int width = 17; + unsigned int height = 29; + unsigned int channels = 2; + + float a = 0.234f; + float b = -12.345f; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType()); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + float minVal = -10.f; + if (f == armnn::ActivationFunction::Sqrt) + { + minVal = 0.f; + } + + boost::multi_array input = MakeRandomTensor(inputTensorInfo, 21453, minVal, 10.f); + + + LayerTestResult ret(outputTensorInfo); + auto boostArrayExtents = boost::extents + [boost::numeric_cast(batchSize)] + [boost::numeric_cast(channels)] + [boost::numeric_cast(height)] + [boost::numeric_cast(width)]; + ret.output.resize(boostArrayExtents); + ret.outputExpected.resize(boostArrayExtents); + + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ActivationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_A = a; + data.m_Parameters.m_B = b; + data.m_Parameters.m_Function = f; + + armnn::ActivationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateActivation(data, info); + BOOST_ASSERT(workload != nullptr); + std::unique_ptr workloadRef = refWorkloadFactory.CreateActivation(refData, refInfo); + BOOST_ASSERT(workloadRef != nullptr); + + inputHandle->Allocate(); + outputHandle->Allocate(); + inputHandleRef->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +LayerTestResult CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize) +{ + return CompareActivationTestImpl(workloadFactory, refWorkloadFactory, f, batchSize); +} + +LayerTestResult CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f) +{ + return CompareActivationTestImpl(workloadFactory, refWorkloadFactory, f, 5, 0.1f, 50); +} diff --git a/src/backends/test/ArmComputeCl.cpp b/src/backends/test/ArmComputeCl.cpp new file mode 100644 index 0000000000..9a516b6d60 --- /dev/null +++ b/src/backends/test/ArmComputeCl.cpp @@ -0,0 +1,311 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClWorkloadFactory.hpp" +#include "backends/ClWorkloads/ClWorkloadUtils.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/ClLayerSupport.hpp" +#include "ActivationFixture.hpp" +#include "ClContextControlFixture.hpp" + +#include +#include +#include +#include + +#include "test/UnitTests.hpp" + +BOOST_FIXTURE_TEST_SUITE(Compute_ArmComputeCl, ClContextControlFixture) +using FactoryType = armnn::ClWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Fully Connected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) +ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) + +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) + +// Softmax +BOOST_AUTO_TEST_CASE(Softmax4dSupport) +{ + const unsigned int numDimensions = 4u; + std::array dimensionSizes; + dimensionSizes.fill(1u); + + const armnn::TensorInfo inputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + const armnn::TensorInfo outputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + + // 4D Softmax should be reported as unsupported on the CL backend + BOOST_TEST(!armnn::IsSoftmaxSupportedCl(inputInfo, outputInfo, armnn::SoftmaxDescriptor())); +} + +// Splitter +ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) +ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, + false) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, + true) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) + +ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) + +// Sub +ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) + +// Div +ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) +ARMNN_AUTO_TEST_CASE(DivisionByZero, DivisionByZeroTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) +// NOTE: quantized division is not supported by CL and not required by the +// android NN api + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) + +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) + +// Resize Bilinear +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) + +// Concat +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) + +// Lstm +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32WithCifgWithPeepholeNoProjection, + LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest) +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection, + LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest) +ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection, + LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest) + +// Convert from Float16 to Float32 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) +// Convert from Float32 to Float16 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) + +// ============================================================================ +// COMPARE tests + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Within, + armnn::NormalizationAlgorithmMethod::LocalBrightness) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Across, + armnn::NormalizationAlgorithmMethod::LocalBrightness) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8, CompareSoftmaxUint8Test, 1.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithRefUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Average) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithRef, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithRef, CompareMultiplicationTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) + +// ============================================================================ +// FIXTURE tests + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::TanH, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Linear, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, + CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::SoftReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Abs, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Square, 5u) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/ArmComputeNeon.cpp b/src/backends/test/ArmComputeNeon.cpp new file mode 100644 index 0000000000..f1a2cf65bd --- /dev/null +++ b/src/backends/test/ArmComputeNeon.cpp @@ -0,0 +1,463 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/test/TensorCopyUtils.hpp" +#include "ActivationFixture.hpp" + +#include "WorkloadTestUtils.hpp" + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_ArmComputeNeon) +using FactoryType = armnn::NeonWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +namespace +{ + +armnn::Convolution2dDescriptor MakeConv2dDesc(uint32_t strideX, uint32_t strideY, + uint32_t padLeft = 0, uint32_t padRight = 0, uint32_t padTop = 0, uint32_t padBottom = 0) +{ + armnn::Convolution2dDescriptor result; + result.m_StrideX = strideX; + result.m_StrideY = strideY; + result.m_PadLeft = padLeft; + result.m_PadRight = padRight; + result.m_PadTop = padTop; + result.m_PadBottom = padBottom; + result.m_BiasEnabled = true; + return result; +} + +} + +BOOST_AUTO_TEST_CASE(Conv2dUtils) +{ + // The only preferred Neon convolution is 1x1 with padding=0 and stride size {1,2,3}. + armnn::TensorShape shape1x1({ 1,1,1,1 }); + armnn::TensorInfo info1x1(shape1x1, armnn::DataType::Float32); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 3))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(2, 3))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 1))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 2))); + BOOST_TEST(armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 3))); + + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 1))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(4, 5))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(3, 6))); + + // non zero padding is not preferred for direct convolution + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 0))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 0, 1))); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info1x1, MakeConv2dDesc(1, 1, 1, 1))); + + // 2x2 filter not preferred for direct convolution + armnn::TensorShape shape2x2({ 1,1,2,2 }); + armnn::TensorInfo info2x2(shape2x2, armnn::DataType::Float32); + BOOST_TEST(!armnn::IsNeonDirectConvolutionPreferred(info2x2, MakeConv2dDesc(1, 1))); +} + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) + +namespace +{ + +armnn::DepthwiseConvolution2dDescriptor MakeDepthwiseConv2dDesc(uint32_t strideX, uint32_t strideY, + uint32_t depthMultiplier = 1, uint32_t padLeft = 0, uint32_t padRight = 0, + uint32_t padTop = 0, uint32_t padBottom = 0) +{ + boost::ignore_unused(depthMultiplier); + + armnn::DepthwiseConvolution2dDescriptor desc; + + desc.m_PadLeft = padLeft; + desc.m_PadRight = padRight; + + desc.m_PadTop = padTop; + desc.m_PadBottom = padBottom; + desc.m_StrideX = strideX; + desc.m_StrideY = strideY; + desc.m_BiasEnabled = false; + + return desc; +} + +armnn::TensorInfo CreateOutputTensorInfo(const armnn::TensorInfo& inputInfo, + const armnn::TensorInfo& weightsInfo, + const armnn::DepthwiseConvolution2dDescriptor& descriptor, + armnn::DataType dataType) +{ + const armnn::TensorShape& inputShape = inputInfo.GetShape(); + const armnn::TensorShape& filterShape = weightsInfo.GetShape(); + + unsigned int inWidth = inputShape[3]; + unsigned int inHeight = inputShape[2]; + unsigned int inBatchSize = inputShape[0]; + + unsigned int filterWidth = filterShape[3]; + unsigned int readWidth = (inWidth + descriptor.m_PadLeft + descriptor.m_PadRight) - (filterWidth); + unsigned int outWidth = 1u + (readWidth / descriptor.m_StrideX); + + unsigned int filterHeight = filterShape[2]; + unsigned int readHeight = (inHeight + descriptor.m_PadTop + descriptor.m_PadBottom) - (filterHeight); + unsigned int outHeight = 1u + (readHeight / descriptor.m_StrideY); + unsigned int depthMultiplier = filterShape[0]; + + unsigned int outChannels = filterShape[1] * depthMultiplier; + unsigned int outBatchSize = inBatchSize; + + armnn::TensorShape outputShape({outBatchSize, outChannels, outHeight, outWidth}); + return armnn::TensorInfo(outputShape, dataType); +} +} + +BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils) +{ + const armnn::DataType dataType = armnn::DataType::Float32; + + armnn::TensorInfo inputInfo({1, 1, 10, 10 }, dataType); + armnn::TensorInfo outputInfo; + armnn::TensorInfo weightsInfo3x3({ 1, 1, 3, 3 }, dataType); + armnn::TensorInfo biasesInfo; + + armnn::DepthwiseConvolution2dDescriptor descriptor; + + // Strides supported: 1,2,3 + descriptor = MakeDepthwiseConv2dDesc(1, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(1, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(1, 3); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(2, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(2, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(2, 3); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(3, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(3, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + descriptor = MakeDepthwiseConv2dDesc(3, 3); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + // Supported stride 4 + descriptor = MakeDepthwiseConv2dDesc(4, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); + + // Supported weights shape 1x1 + armnn::TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, armnn::DataType::Float32); + descriptor = MakeDepthwiseConv2dDesc(1, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo1x1, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo1x1, biasesInfo)); + + // Supported shape 2x2 + armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32); + descriptor = MakeDepthwiseConv2dDesc(1, 1); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo2x2, biasesInfo)); + + // Asymmetric padding + descriptor = MakeDepthwiseConv2dDesc(1, 1, 1, 1, 2, 1, 2); + outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo3x3, descriptor, dataType); + BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, outputInfo, descriptor, + weightsInfo3x3, biasesInfo)); +} + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride1, L2Pooling2dSize3Stride1Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride1Uint8, L2Pooling2dSize3Stride1Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride3, L2Pooling2dSize3Stride3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride3Uint8, L2Pooling2dSize3Stride3Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize3Stride4, L2Pooling2dSize3Stride4Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize3Stride4Uint8, L2Pooling2dSize3Stride4Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize9, L2Pooling2dSize9Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_L2Pooling2dSize9Uint8, L2Pooling2dSize9Uint8Test) + +// Ignore padding values for pooling but count padding fields into the divisor +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, + true) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(UNSUPPORTED_IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Softmax +BOOST_AUTO_TEST_CASE(Softmax4dSupport) +{ + const unsigned int numDimensions = 4u; + std::array dimensionSizes; + dimensionSizes.fill(1u); + + const armnn::TensorInfo inputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + const armnn::TensorInfo outputInfo(numDimensions, &dimensionSizes.front(), armnn::DataType::Float32); + + // 4D Softmax should be reported as unsupported on the NEON backend + BOOST_TEST(!armnn::IsSoftmaxSupportedNeon(inputInfo, outputInfo, armnn::SoftmaxDescriptor())); +} + +// Splitter +ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) +ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Fully Connected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) + +// Sub +ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantTestUint8) + +// Concatenation +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// L2 Normalization +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest); +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest); + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) + +// ============================================================================ +// COMPARE tests + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareConv2dWithReference, CompareConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceFloat32, CompareDepthwiseConvolution2dTest) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareDepthwiseConv2dWithReferenceUint8, CompareDepthwiseConvolution2dTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationWithinWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Within, + armnn::NormalizationAlgorithmMethod::LocalBrightness) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareNormalizationAcrossWithReference, CompareNormalizationTest, + armnn::NormalizationAlgorithmChannel::Across, + armnn::NormalizationAlgorithmMethod::LocalBrightness) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::Max) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMaxPooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Max) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReference, ComparePooling2dTest, + armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAveragePooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::Average) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareL2Pooling2dWithReference, ComparePooling2dTest, armnn::PoolingAlgorithm::L2) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(UNSUPPORTED_CompareL2Pooling2dWithReferenceUint8, ComparePooling2dUint8Test, + armnn::PoolingAlgorithm::L2) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta1WithReference, CompareSoftmaxTest, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxBeta2WithReference, CompareSoftmaxTest, 2.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta1WithReference, CompareSoftmaxUint8Test, 1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareSoftmaxUint8Beta2WithReference, CompareSoftmaxUint8Test, 2.0f) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareAddition, CompareAdditionTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareMultiplicationWithReference, CompareMultiplicationTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(CompareBatchNorm, CompareBatchNormTest) + +ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu1, CompareBoundedReLuTest, 1.0f, -1.0f) +ARMNN_COMPARE_REF_AUTO_TEST_CASE(ReLu6, CompareBoundedReLuTest, 6.0f, 0.0f) + +// ============================================================================ +// FIXTURE tests + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSigmoidActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sigmoid, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareTanhActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::TanH, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLinearActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Linear, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::ReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::BoundedReLu, 5u) +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareBoundedReLuActivationWithReferenceUint8, ActivationFixture, + CompareActivationUint8Test, armnn::ActivationFunction::BoundedReLu) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSoftReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::SoftReLu, 1u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareLeakyReLuActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::LeakyReLu, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareAbsActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Abs, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSqrtActivationWithReference, PositiveActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Sqrt, 5u) + +ARMNN_COMPARE_REF_FIXTURE_TEST_CASE(CompareSquareActivationWithReference, ActivationFixture, + CompareActivationTest, armnn::ActivationFunction::Square, 5u) +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/BatchNormTestImpl.hpp b/src/backends/test/BatchNormTestImpl.hpp new file mode 100644 index 0000000000..7126db9074 --- /dev/null +++ b/src/backends/test/BatchNormTestImpl.hpp @@ -0,0 +1,112 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include + +#include "test/TensorHelpers.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "backends/test/QuantizeHelper.hpp" + + +template +LayerTestResult BatchNormTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 2; + const unsigned int num = 1; + + armnn::TensorInfo inputTensorInfo({num, channels, height, width}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({num, channels, height, width}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo({channels}, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + tensorInfo.SetQuantizationScale(qScale); + tensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 1.f, 1.f, + 4.f, 1.f, + -2.f, 4.f + })); + // These values are per-channel of the input. + auto mean = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, -2})); + auto variance = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {4, 9})); + auto beta = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {3, 2})); + auto gamma = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, {2, 1})); + LayerTestResult ret(outputTensorInfo); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::BatchNormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); + armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); + armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); + armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.0f; + + // For each channel: + // substract mean, divide by standard deviation (with an epsilon to avoid div by 0), + // multiply by gamma and add beta + ret.outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, + { + 1.f, 4.f, + 4.f, 2.f, + 1.f, 6.f, + + 3.f, 3.f, + 4.f, 3.f, + 2.f, 4.f + })); + + std::unique_ptr workload = workloadFactory.CreateBatchNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} \ No newline at end of file diff --git a/src/backends/test/ClContextControlFixture.hpp b/src/backends/test/ClContextControlFixture.hpp new file mode 100644 index 0000000000..54c5a4f505 --- /dev/null +++ b/src/backends/test/ClContextControlFixture.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/ClContextControl.hpp" + +template +struct ClContextControlFixtureBase +{ + // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case + ClContextControlFixtureBase() : m_ClContextControl(nullptr, ProfilingEnabled) {} + ~ClContextControlFixtureBase() {} + + armnn::ClContextControl m_ClContextControl; +}; + +using ClContextControlFixture = ClContextControlFixtureBase; +using ClProfilingContextControlFixture = ClContextControlFixtureBase; diff --git a/src/backends/test/Conv2dTestImpl.hpp b/src/backends/test/Conv2dTestImpl.hpp new file mode 100644 index 0000000000..eb7165bf09 --- /dev/null +++ b/src/backends/test/Conv2dTestImpl.hpp @@ -0,0 +1,921 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +// Mapping from input type to bias type for fully connected layers. +// float => float, uint8_t => int32_t +template +struct FullyConnectedBiasTypeForInputType; + +template<> +struct FullyConnectedBiasTypeForInputType +{ + using Type = float; +}; + +template<> +struct FullyConnectedBiasTypeForInputType +{ + using Type = int32_t; +}; + +// Modifies a std::vector in-place using a specified bias. +template +void ApplyBias(std::vector& v, float vScale, int32_t vOffset, + const std::vector& bias, float bScale, int32_t bOffset, uint32_t w, uint32_t h) +{ + BOOST_ASSERT_MSG((armnn::IsQuantizedType() && vScale != 0.0f) || (!armnn::IsQuantizedType()), + "Invalid type and parameter combination."); + BOOST_ASSERT_MSG((armnn::IsQuantizedType() && bScale != 0.0f) || (!armnn::IsQuantizedType()), + "Invalid type and parameter combination."); + + // Note we need to dequantize and re-quantize the image value and the bias. + for (uint32_t i = 0; i < bias.size(); ++i) + { + float dBias = SelectiveDequantize(bias[i], bScale, bOffset); + for (uint32_t y = 0; y < h; ++y) + { + for (uint32_t x = 0; x < w; ++x) + { + uint32_t offset = (i * h + y) * w + x; + BOOST_ASSERT(offset < v.size()); + T& outRef = v[offset]; + float dOutput = SelectiveDequantize(outRef, vScale, vOffset); + outRef = SelectiveQuantize(dOutput + dBias, vScale, vOffset); + } + } + } +} + +template +LayerTestResult SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& kernel, + const boost::multi_array& bias, + const boost::multi_array& outputExpected, + float qScale, + int32_t qOffset, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0) +{ + unsigned int inputHeight = boost::numeric_cast(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast(input.shape()[3]); + unsigned int inputChannels = boost::numeric_cast(input.shape()[1]); + unsigned int inputNum = boost::numeric_cast(input.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast(outputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast(outputExpected.shape()[1]); + unsigned int outputNum = boost::numeric_cast(outputExpected.shape()[0]); + + unsigned int kernelHeight = boost::numeric_cast(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast(kernel.shape()[3]); + unsigned int kernelChannels = boost::numeric_cast(kernel.shape()[1]); + unsigned int kernelDepthMul = boost::numeric_cast(kernel.shape()[0]); + + bool biasEnabled = bias.size() > 0; + + // This function currently assumes 1 batch of input/output (and duplicates this into 2 batches). + BOOST_ASSERT(inputNum == 1); + BOOST_ASSERT(outputNum == 1); + + // If a bias is used, its size must equal the number of output channels. + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + + // Note these tensors will use two (identical) batches. + armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType()); + armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType()); + armnn::TensorInfo biasDesc({static_cast(bias.size())}, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + LayerTestResult ret(outputTensorInfo); + + // Construct input data - two batches of the same input image. + std::vector inputImage; + inputImage.assign(input.data(), input.data() + 1*inputChannels*inputHeight*inputWidth); + std::vector inputData; + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + inputData.insert(inputData.end(), inputImage.begin(), inputImage.end()); + auto batchedInput = MakeTensor(inputTensorInfo, inputData); + + std::vector outputImage; + outputImage.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); + + // Apply bias to output image if it is enabled. + if(biasEnabled) + { + std::vector biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + // Construct expected output data - two identical images. + std::vector outputData; + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + outputData.insert(outputData.end(), outputImage.begin(), outputImage.end()); + + ret.outputExpected = MakeTensor(outputTensorInfo, outputData); + + // Todo: nontrivial padding and strides. + uint32_t strideX = 1; + uint32_t strideY = 1; + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + if(biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - can be a source of bugs. + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template +LayerTestResult DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& kernel, + const boost::multi_array& bias, + const boost::multi_array& outputExpected, + float qScale, + int32_t qOffset, + uint32_t padLeft = 0, + uint32_t padTop = 0, + uint32_t padRight = 0, + uint32_t padBottom = 0, + uint32_t strideX = 1, + uint32_t strideY = 1) +{ + unsigned int inputNum = boost::numeric_cast(input.shape()[0]); + unsigned int inputChannels = boost::numeric_cast(input.shape()[1]); + unsigned int inputHeight = boost::numeric_cast(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast(input.shape()[3]); + unsigned int kernelChanMul = boost::numeric_cast(kernel.shape()[0]); + unsigned int kernelChannels = boost::numeric_cast(kernel.shape()[1]); + unsigned int kernelHeight = boost::numeric_cast(kernel.shape()[2]); + unsigned int kernelWidth = boost::numeric_cast(kernel.shape()[3]); + unsigned int outputNum = boost::numeric_cast(outputExpected.shape()[0]); + unsigned int outputChannels = boost::numeric_cast(outputExpected.shape()[1]); + unsigned int outputHeight = boost::numeric_cast(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast(outputExpected.shape()[3]); + + // If a bias is used, its size must equal the number of output channels. + bool biasEnabled = bias.size() > 0; + BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels); + + // Creates the tensors. + armnn::TensorInfo inputTensorInfo({inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({outputNum, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType()); + armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType()); + armnn::TensorInfo biasDesc({static_cast(bias.size())}, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if (armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + // Construct the input data. + std::vector inputData; + inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth); + auto batchedInput = MakeTensor(inputTensorInfo, inputData); + + // Construct the output data, with bias applied, as appropriate. + std::vector outputData; + outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth); + if (biasEnabled) + { + std::vector biasV; + biasV.assign(bias.data(), bias.data() + outputChannels); + ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, outputData); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + if (biasEnabled) + { + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + } + + armnn::DepthwiseConvolution2dQueueDescriptor data; + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs. + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padLeft; + data.m_Parameters.m_PadRight = padRight; + data.m_Parameters.m_PadTop = padTop; + data.m_Parameters.m_PadBottom = padBottom; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template +LayerTestResult DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + unsigned int inputHeight = 3; + unsigned int inputWidth = 3; + unsigned int inputChannels = 2; + unsigned int inputNum = 1; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int kernelChannels = inputChannels; + + unsigned int outputHeight = 1; + unsigned int outputWidth = 1; + unsigned int outputChannels = kernelChannels; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo({ inputNum, inputChannels, inputHeight, inputWidth }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ outputNum, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + armnn::TensorInfo kernelDesc({ 1, outputChannels, kernelHeight, kernelWidth }, armnn::GetDataType()); + armnn::TensorInfo biasDesc({ outputChannels }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + + 1.f, 2.f, 1.f, + 2.f, 1.f, 2.f, + 1.f, 2.f, 1.f, + }))); + + std::vector biasV(QuantizedVector(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2})); + auto bias = MakeTensor(biasDesc, biasV); + + auto kernel = MakeTensor(kernelDesc, std::vector( + QuantizedVector(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + + 1.f, 0.f, 1.f, + 0.f, 0.f, 0.f, + -1.f, 0.f, -1.f, + }))); + + // Manually calculated. + std::vector outputImage( + QuantizedVector(outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset(), + {0.f, 0.f}) + ); + + // Optionally apply bias to output image. + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, outputImage); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 0; + data.m_Parameters.m_PadBottom = 0; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template +LayerTestResult DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + unsigned int depthMultiplier = 2; + + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 2; + unsigned int inputBatchSize = 1; + + unsigned int kernelHeight = 5; + unsigned int kernelWidth = 3; + + unsigned int outputHeight = inputHeight - kernelHeight + 1 + 2; + unsigned int outputWidth = (inputWidth - kernelWidth + 1)/2; + unsigned int outputChannels = inputChannels * depthMultiplier; + unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({inputBatchSize, inputChannels, inputHeight, inputWidth}, + armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({outputBatchSize, outputChannels, outputHeight, outputWidth}, + armnn::GetDataType()); + armnn::TensorInfo kernelDesc({depthMultiplier, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType()); + armnn::TensorInfo biasDesc({outputChannels}, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + kernelDesc.SetQuantizationScale(qScale); + kernelDesc.SetQuantizationOffset(qOffset); + biasDesc.SetQuantizationScale(qScale*qScale); + biasDesc.SetQuantizationOffset(0); + } + + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + std::vector biasV(QuantizedVector(biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + {0, 2, 1, -1})); + auto bias = MakeTensor(biasDesc, biasV); + + auto kernel = MakeTensor(kernelDesc, std::vector( + QuantizedVector(kernelDesc.GetQuantizationScale(), kernelDesc.GetQuantizationOffset(), { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + 0, 0, 0, + 0, -1, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 0, 0, 0, + 0, 0, 0, + 0, 1, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Manually calculated. + std::vector outputImage = std::vector( + QuantizedVector(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, 3.5f, + 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, 6.5f, + 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, 5.0f, + + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, -0.5f, + + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 10.0f, 10.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 8.0f, 8.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f + })); + + // Optionally apply bias to output image. + if(biasEnabled) + { + ApplyBias(outputImage, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), + biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(), + outputWidth, outputHeight); + } + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, outputImage); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled. + data.m_Parameters.m_StrideX = 2; + data.m_Parameters.m_StrideY = 1; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = 1; + data.m_Parameters.m_PadBottom = 1; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template +LayerTestResult Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + using B = typename FullyConnectedBiasTypeForInputType::Type; + + // Until we have a specialist 1D convolution layer, we can fake one using + // 2D convolution with the final dimension set to 1. + // I don't anticipate this being particularly slow, given that convolution is implemented + // as a matrix multiplication, at which point dimension doesn't matter. + + unsigned int batchSize = 1; + unsigned int inputChannels = 2; + unsigned int outputChannels = 3; + unsigned int inputSize = 5; // The 1D size (could view as 'width' or 'height'). + unsigned int kernelSize = 3; + unsigned int padSize = 2; + unsigned int stride = 1; + unsigned int outputSize = 7; // (inputSize + 2 * padSize - kernelSize + 1) / stride. + + armnn::TensorInfo inputInfo({batchSize, inputChannels, inputSize, 1}, armnn::GetDataType()); + armnn::TensorInfo outputInfo({batchSize, outputChannels, outputSize, 1}, armnn::GetDataType()); + armnn::TensorInfo kernelInfo({outputChannels, inputChannels, kernelSize, 1}, armnn::GetDataType()); + armnn::TensorInfo biasInfo({outputChannels}, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputInfo.SetQuantizationScale(qScale); + inputInfo.SetQuantizationOffset(qOffset); + outputInfo.SetQuantizationScale(qScale); + outputInfo.SetQuantizationOffset(qOffset); + kernelInfo.SetQuantizationScale(qScale); + kernelInfo.SetQuantizationOffset(qOffset); + biasInfo.SetQuantizationScale(inputInfo.GetQuantizationScale()*kernelInfo.GetQuantizationScale()); + biasInfo.SetQuantizationOffset(0); + } + + std::vector inputData( + QuantizedVector(inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset(), { + 5.0f, -2.0f, 2.5f, 0.0f, 1.0f, + -3.0f, 3.2f, 5.0f, 2.0f, 3.0f, + })); + + std::vector kernelData( + QuantizedVector(kernelInfo.GetQuantizationScale(), kernelInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f, + 0.0f, 2.0f, -1.5f, + + 0.0f, 0.0f, 0.0f, + 0.2f, 0.2f, 0.2f, + + 0.5f, 0.0f, 0.5f, + 0.0f, -1.0f, 0.0f + })); + + std::vector biasData( + QuantizedVector(biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), { + 1.0f, 0.0f, 0.0f + })); + + std::vector outputData( + QuantizedVector(outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), { + 4.5f, -10.8f, 5.0f + 6.4f - 7.5f, -2.0f + 10.0f -3.0f, 2.5f + 4.0f - 4.5f, 6.0f, 1.0f, + -0.6f, -0.6f + 0.64f, -0.6f + 0.64f + 1.0f, 0.64f + 1.0f + 0.4f, 1.0f + 0.4f + 0.6f, 0.4f + 0.6f, 0.6f, + 2.5f, -1.0f + 3.0f, 1.25f - 3.2f + 2.5f, -1.0f - 5.0f, 1.25f + 0.5f - 2.0f, -3.0f, 0.5f + })); + + // Optionally apply bias to output image. + if(biasEnabled) + { + ApplyBias(outputData, outputInfo.GetQuantizationScale(), outputInfo.GetQuantizationOffset(), + biasData, biasInfo.GetQuantizationScale(), biasInfo.GetQuantizationOffset(), + 1, outputSize); + } + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelInfo); + armnn::ScopedCpuTensorHandle biasTensor(biasInfo); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, kernelData.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data()); + + AddInputToWorkload(data, info, inputInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputInfo, outputHandle.get()); + + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = 1; + data.m_Parameters.m_StrideY = stride; + data.m_Parameters.m_PadLeft = 0; + data.m_Parameters.m_PadRight = 0; + data.m_Parameters.m_PadTop = padSize; + data.m_Parameters.m_PadBottom = padSize; + data.m_Parameters.m_BiasEnabled = biasEnabled; + + std::unique_ptr workload = workloadFactory.CreateConvolution2d(data, info); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData.data()); + + workloadFactory.Finalize(); + workload->Execute(); + + // Output + LayerTestResult ret(outputInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + ret.outputExpected = MakeTensor(outputInfo, outputData); + return ret; +} + + + +template +LayerTestResult CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = 2; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + unsigned int kernelShape[] = {outputChannels, inputChannels, kernelHeight, kernelWidth}; + unsigned int biasShape[] = {outputChannels}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType()); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType()); + kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType()); + biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType()); + + LayerTestResult ret(outputTensorInfo); + + auto input = MakeRandomTensor(inputTensorInfo, 124908); + auto kernel = MakeRandomTensor(kernelDesc, 891234); + auto bias = MakeRandomTensor(biasDesc, 1028); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Convolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::Convolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateConvolution2d(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +template +LayerTestResult CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int inputHeight = 8; + unsigned int inputWidth = 16; + unsigned int inputChannels = 3; + unsigned int inputNum = 5; + + unsigned int kernelHeight = 3; + unsigned int kernelWidth = 3; + unsigned int channelMultiplier = 1; + + unsigned int strideX = 2; + unsigned int strideY = 3; + unsigned int padX = 1; + unsigned int padY = 1; + + unsigned int outputNum = inputNum; + unsigned int outputChannels = inputChannels * channelMultiplier; + unsigned int outputHeight = (inputHeight + 2 * padY - kernelHeight + strideY) / strideY; + unsigned int outputWidth = (inputWidth + 2 * padX - kernelWidth + strideX) / strideX; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo kernelDesc; + armnn::TensorInfo biasDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + unsigned int kernelShape[] = { channelMultiplier, inputChannels, kernelHeight, kernelWidth }; + unsigned int biasShape[] = { outputChannels }; + + float inputsQScale = armnn::IsQuantizedType() ? 1.0f : 0; + float outputQScale = armnn::IsQuantizedType() ? 2.0f : 0; + int32_t qOffset = 0; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType(), inputsQScale, qOffset); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType(), outputQScale, qOffset); + kernelDesc = armnn::TensorInfo(4, kernelShape, armnn::GetDataType(), inputsQScale, qOffset); + biasDesc = armnn::TensorInfo(1, biasShape, armnn::GetBiasDataType(armnn::GetDataType()), inputsQScale, qOffset); + + LayerTestResult ret(outputTensorInfo); + + auto input = MakeRandomTensor(inputTensorInfo, 124908, 0.0f, 255.0f); + auto kernel = MakeRandomTensor(kernelDesc, 891234, 0.0f, 255.0f); + auto bias = MakeRandomTensor::Type, 1>(biasDesc, 1028, 0.0f, 255.0f); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_BiasEnabled = true; + + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::DepthwiseConvolution2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateDepthwiseConvolution2d(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateDepthwiseConvolution2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} diff --git a/src/backends/test/ConvertFp16ToFp32TestImpl.hpp b/src/backends/test/ConvertFp16ToFp32TestImpl.hpp new file mode 100644 index 0000000000..b75879dea6 --- /dev/null +++ b/src/backends/test/ConvertFp16ToFp32TestImpl.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include + +LayerTestResult SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory) +{ + using namespace half_float::literal; + + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + const armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, + { -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, + 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h }); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ConvertFp16ToFp32QueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateConvertFp16ToFp32(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} diff --git a/src/backends/test/ConvertFp32ToFp16TestImpl.hpp b/src/backends/test/ConvertFp32ToFp16TestImpl.hpp new file mode 100644 index 0000000000..1325b4b054 --- /dev/null +++ b/src/backends/test/ConvertFp32ToFp16TestImpl.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include + +LayerTestResult SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory) +{ + using namespace half_float::literal; + + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16); + + auto input = MakeTensor(inputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, + { -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h, + 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h }); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ConvertFp32ToFp16QueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateConvertFp32ToFp16(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} \ No newline at end of file diff --git a/src/backends/test/CreateWorkloadCl.cpp b/src/backends/test/CreateWorkloadCl.cpp new file mode 100644 index 0000000000..af3192cae2 --- /dev/null +++ b/src/backends/test/CreateWorkloadCl.cpp @@ -0,0 +1,564 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "backends/ClWorkloadFactory.hpp" +#include "backends/RefWorkloadFactory.hpp" +#include "backends/MemCopyWorkload.hpp" +#include "backends/ClWorkloads/ClWorkloadUtils.hpp" +#include "backends/ClWorkloads.hpp" +#include "backends/ClTensorHandle.hpp" +#include "ClContextControlFixture.hpp" + +#include "test/CreateWorkloadClNeon.hpp" + +boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, + std::initializer_list expectedDimensions) +{ + return CompareTensorHandleShape(tensorHandle, expectedDimensions); +} + +BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture) + +template +static void ClCreateActivationWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateActivationWorkloadTest(factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1})); +} + +BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) +{ + ClCreateActivationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) +{ + ClCreateActivationWorkloadTest(); +} + +template +static void ClCreateArithmethicWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateArithmeticWorkloadTest(factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of CreateArithmeticWorkloadTest). + DescriptorType queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3})); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) +{ + ClCreateArithmethicWorkloadTest, + AdditionQueueDescriptor, + AdditionLayer, + armnn::DataType::Float32>(); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) +{ + ClCreateArithmethicWorkloadTest, + AdditionQueueDescriptor, + AdditionLayer, + armnn::DataType::Float16>(); +} + +BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) +{ + ClCreateArithmethicWorkloadTest, + SubtractionQueueDescriptor, + SubtractionLayer, + armnn::DataType::Float32>(); +} + +BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) +{ + ClCreateArithmethicWorkloadTest, + SubtractionQueueDescriptor, + SubtractionLayer, + armnn::DataType::Float16>(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest) +{ + ClCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest) +{ + ClCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest) +{ + ClCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest) +{ + ClCreateArithmethicWorkloadTest(); +} + +template +static void ClCreateBatchNormalizationWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateBatchNormalizationWorkloadTest + (factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 1, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3, 1, 1})); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatWorkload) +{ + ClCreateBatchNormalizationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) +{ + ClCreateBatchNormalizationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload) +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvertFp16ToFp32WorkloadTest(factory, graph); + + ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3})); + BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); + BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload) +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvertFp32ToFp16WorkloadTest(factory, graph); + + ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3})); + BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32)); + BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16)); +} + +template +static void ClConvolution2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest + (factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 8, 16})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 2, 10})); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatWorkload) +{ + ClConvolution2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload) +{ + ClConvolution2dWorkloadTest(); +} + + +template +static void ClDirectConvolution2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateDirectConvolution2dWorkloadTest( + factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest). + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6})); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload) +{ + ClDirectConvolution2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload) +{ + ClDirectConvolution2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload) +{ + ClDirectConvolution2dWorkloadTest(); +} + +template +static void ClCreateFullyConnectedWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = + CreateFullyConnectedWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7})); +} + + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest) +{ + ClCreateFullyConnectedWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest) +{ + ClCreateFullyConnectedWorkloadTest(); +} + +template +static void ClNormalizationWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateNormalizationWorkloadTest + (factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 5, 5, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 5, 5, 1})); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationFloatWorkload) +{ + ClNormalizationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload) +{ + ClNormalizationWorkloadTest(); +} + +template +static void ClPooling2dWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreatePooling2dWorkloadTest(factory, graph); + + // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest). + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 5, 5})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 2, 4})); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloatWorkload) +{ + ClPooling2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) +{ + ClPooling2dWorkloadTest(); +} + +template +static void ClCreateReshapeWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateReshapeWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL. +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) +{ + ClCreateReshapeWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) +{ + ClCreateReshapeWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + ClCreateReshapeWorkloadTest(); +} + +template +static void ClSoftmaxWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateSoftmaxWorkloadTest(factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload). + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1})); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1})); +} + + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest) +{ + ClSoftmaxWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest) +{ + ClSoftmaxWorkloadTest(); +} + +template +static void ClSplitterWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateSplitterWorkloadTest(factory, graph); + + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7})); + + auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7})); + + auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[2]); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7})); + + auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1 + // we are raising this difference between the NEON and CL libs as an issue with the compute library team. + BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7})); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload) +{ + ClSplitterWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload) +{ + ClSplitterWorkloadTest(); +} + +template +static void ClSplitterMergerTest() +{ + // Tests that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer. + // We test that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + ClWorkloadFactory factory; + + auto workloads = + CreateSplitterMergerWorkloadTest + (factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::ClSubTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); + armnn::ClSubTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); + armnn::ClSubTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* mIn1 = dynamic_cast(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + //Fliped order of inputs/outputs. + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + BOOST_TEST(validDataPointers); + + + //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor. + bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent()) + && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent()); + + BOOST_TEST(validSubTensorParents); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloatWorkload) +{ + ClSplitterMergerTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat16Workload) +{ + ClSplitterMergerTest(); +} + + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) +{ + // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We create a splitter with two outputs. That each of those outputs is used by two different activation layers. + + Graph graph; + ClWorkloadFactory factory; + std::unique_ptr wlSplitter; + std::unique_ptr wlActiv0_0; + std::unique_ptr wlActiv0_1; + std::unique_ptr wlActiv1_0; + std::unique_ptr wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, + wlActiv1_0, wlActiv1_1); + + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::ClSubTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); + armnn::ClSubTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); + armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast(wlActiv0_0->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast(wlActiv0_1->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast(wlActiv1_0->GetData().m_Inputs[0]); + armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl) +{ + ClWorkloadFactory factory; + CreateMemCopyWorkloads(factory); +} + +BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload) +{ + Graph graph; + ClWorkloadFactory factory; + + auto workload = CreateL2NormalizationWorkloadTest + (factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest). + L2NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 5, 20, 50, 67 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 5, 20, 50, 67 })); +} + +template +static void ClCreateLstmWorkloadTest() +{ + Graph graph; + ClWorkloadFactory factory; + auto workload = CreateLstmWorkloadTest(factory, graph); + + LstmQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 })); +} + +BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload) +{ + ClCreateLstmWorkloadTest(); +} + + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/CreateWorkloadNeon.cpp b/src/backends/test/CreateWorkloadNeon.cpp new file mode 100644 index 0000000000..fbe064e1c4 --- /dev/null +++ b/src/backends/test/CreateWorkloadNeon.cpp @@ -0,0 +1,455 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "backends/NeonWorkloadFactory.hpp" +#include "backends/NeonWorkloadUtils.hpp" +#include "backends/NeonWorkloads.hpp" +#include "backends/MemCopyWorkload.hpp" +#include "backends/NeonTensorHandle.hpp" + +#include "test/CreateWorkloadClNeon.hpp" + +BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon) + +namespace +{ + +bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo) +{ + using namespace armnn::armcomputetensorutils; + + const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info(); + const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo); + + if (handleInfo->data_type() != expectedAclInfo.data_type()) + { + return false; + } + + if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions()) + { + return false; + } + + if (handleInfo->quantization_info() != expectedAclInfo.quantization_info()) + { + return false; + } + + for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d) + { + if (handleInfo->dimension(d) != expectedAclInfo.dimension(d)) + { + return false; + } + } + + return true; +} + +} // namespace + +template +static void NeonCreateActivationWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateActivationWorkloadTest + (factory, graph); + + // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest). + ActivationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload) +{ + NeonCreateActivationWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload) +{ + NeonCreateActivationWorkloadTest(); +} + +template +static void NeonCreateArithmethicWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateArithmeticWorkloadTest(factory, graph); + + DescriptorType queueDescriptor = workload->GetData(); + auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto inputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload) +{ + NeonCreateArithmethicWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) +{ + NeonCreateArithmethicWorkloadTest(); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload) +{ + NeonCreateArithmethicWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) +{ + NeonCreateArithmethicWorkloadTest(); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload) +{ + NeonCreateArithmethicWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) +{ + NeonCreateArithmethicWorkloadTest(); +} + +template +static void NeonCreateBatchNormalizationWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateBatchNormalizationWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). + BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 1, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3, 1, 1}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) +{ + NeonCreateBatchNormalizationWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatWorkload) +{ + NeonCreateBatchNormalizationWorkloadTest(); +} + +template +static void NeonCreateConvolution2dWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). + Convolution2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 2, 2, 10}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload) +{ + NeonCreateConvolution2dWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatWorkload) +{ + NeonCreateConvolution2dWorkloadTest(); +} + +template +static void NeonCreateFullyConnectedWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateFullyConnectedWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload) +{ + NeonCreateFullyConnectedWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload) +{ + NeonCreateFullyConnectedWorkloadTest(); +} + +template +static void NeonCreateNormalizationWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateNormalizationWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). + NormalizationQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 5, 5, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 5, 5, 1}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload) +{ + NeonCreateNormalizationWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateNormalizationFloatWorkload) +{ + NeonCreateNormalizationWorkloadTest(); +} + +template +static void NeonCreatePooling2dWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreatePooling2dWorkloadTest + (factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). + Pooling2dQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 2, 5, 5}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 2, 2, 4}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload) +{ + NeonCreatePooling2dWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloatWorkload) +{ + NeonCreatePooling2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) +{ + NeonCreatePooling2dWorkloadTest(); +} + +template +static void NeonCreateReshapeWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateReshapeWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). + ReshapeQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload) +{ + NeonCreateReshapeWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload) +{ + NeonCreateReshapeWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + NeonCreateReshapeWorkloadTest(); +} + +template +static void NeonCreateSoftmaxWorkloadTest() +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateSoftmaxWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). + SoftmaxQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType))); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType))); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload) +{ + NeonCreateSoftmaxWorkloadTest(); +} +#endif + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload) +{ + NeonCreateSoftmaxWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterWorkload) +{ + Graph graph; + NeonWorkloadFactory factory; + auto workload = CreateSplitterWorkloadTest(factory, graph); + + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32))); + + auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32))); + + auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32))); + + auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[2]); + BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMerger) +{ + // Tests that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer. + // We tested that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger, and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + NeonWorkloadFactory factory; + + auto workloads = + CreateSplitterMergerWorkloadTest(factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::INeonTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); + armnn::INeonTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); + armnn::INeonTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* mIn1 = dynamic_cast(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs) +{ + // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We created a splitter with two outputs. That each of those outputs is used by two different activation layers + + Graph graph; + NeonWorkloadFactory factory; + std::unique_ptr wlSplitter; + std::unique_ptr wlActiv0_0; + std::unique_ptr wlActiv0_1; + std::unique_ptr wlActiv1_0; + std::unique_ptr wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, + wlActiv1_0, wlActiv1_1); + + armnn::INeonTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); + armnn::INeonTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); + armnn::INeonTensorHandle* activ0_0Im = dynamic_cast(wlActiv0_0->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ0_1Im = dynamic_cast(wlActiv0_1->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ1_0Im = dynamic_cast(wlActiv1_0->GetData().m_Inputs[0]); + armnn::INeonTensorHandle* activ1_1Im = dynamic_cast(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon) +{ + NeonWorkloadFactory factory; + CreateMemCopyWorkloads(factory); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/CreateWorkloadRef.cpp b/src/backends/test/CreateWorkloadRef.cpp new file mode 100644 index 0000000000..41419dafd0 --- /dev/null +++ b/src/backends/test/CreateWorkloadRef.cpp @@ -0,0 +1,478 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "backends/RefWorkloadFactory.hpp" +#include "backends/RefWorkloads.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "test/CreateWorkload.hpp" + +namespace +{ + +template +void CheckInputOutput(std::unique_ptr workload, const TensorInfo& inputInfo, const TensorInfo& outputInfo) +{ + auto queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST((inputHandle->GetTensorInfo() == inputInfo)); + BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); +} + +template +void CheckInputsOutput(std::unique_ptr workload, + const TensorInfo& inputInfo0, + const TensorInfo& inputInfo1, + const TensorInfo& outputInfo) +{ + auto queueDescriptor = workload->GetData(); + auto inputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + auto inputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Inputs[1]); + auto outputHandle = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST((inputHandle0->GetTensorInfo() == inputInfo0)); + BOOST_TEST((inputHandle1->GetTensorInfo() == inputInfo1)); + BOOST_TEST((outputHandle->GetTensorInfo() == outputInfo)); +} +} + +BOOST_AUTO_TEST_SUITE(CreateWorkloadRef) + +template +static void RefCreateActivationWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateActivationWorkloadTest(factory, graph); + + // Checks that outputs are as we expect them (see definition of CreateActivationWorkloadTest). + CheckInputOutput(std::move(workload), + TensorInfo({ 1, 1 }, DataType), + TensorInfo({ 1, 1 }, DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateActivationFloat32Workload) +{ + RefCreateActivationWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateActivationUint8Workload) +{ + RefCreateActivationWorkloadTest(); +} + +template +static void RefCreateArithmethicWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateArithmeticWorkloadTest(factory, graph); + + CheckInputsOutput(std::move(workload), + TensorInfo({ 2, 3 }, DataType), + TensorInfo({ 2, 3 }, DataType), + TensorInfo({ 2, 3 }, DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateAdditionUint8Workload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateDivisionUint8Workload) +{ + RefCreateArithmethicWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateBatchNormalizationWorkloadTest + (factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). + CheckInputOutput( + std::move(workload), TensorInfo({2, 3, 1, 1}, DataType::Float32), TensorInfo({2, 3, 1, 1}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Float32Workload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvertFp16ToFp32WorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them + CheckInputOutput( + std::move(workload), TensorInfo({1, 3, 2, 3}, DataType::Float16), TensorInfo({1, 3, 2, 3}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Float16Workload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvertFp32ToFp16WorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them + CheckInputOutput( + std::move(workload), TensorInfo({1, 3, 2, 3}, DataType::Float32), TensorInfo({1, 3, 2, 3}, DataType::Float16)); +} + +BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateConvolution2dWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). + CheckInputOutput(std::move(workload), + TensorInfo({2, 3, 8, 16}, DataType::Float32), + TensorInfo({2, 2, 2, 10}, DataType::Float32)); +} + +BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolution2dWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = + CreateDepthwiseConvolution2dWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest). + CheckInputOutput(std::move(workload), + TensorInfo({2, 3, 8, 16}, DataType::Float32), + TensorInfo({2, 9, 2, 10}, DataType::Float32)); +} + +template +static void RefCreateFullyConnectedWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateFullyConnectedWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). + float inputsQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QuantisedAsymm8 ? 2.0f : 0.0; + CheckInputOutput(std::move(workload), + TensorInfo({ 3, 1, 4, 5 }, DataType, inputsQScale), + TensorInfo({ 3, 7 }, DataType, outputQScale)); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat32Workload) +{ + RefCreateFullyConnectedWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateFullyConnectedUint8Workload) +{ + RefCreateFullyConnectedWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateNormalizationWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest). + CheckInputOutput(std::move(workload), + TensorInfo({3, 5, 5, 1}, DataType::Float32), + TensorInfo({3, 5, 5, 1}, DataType::Float32)); +} + +template +static void RefCreatePooling2dWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreatePooling2dWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest). + CheckInputOutput( + std::move(workload), + TensorInfo({3, 2, 5, 5}, DataType), + TensorInfo({3, 2, 2, 4}, DataType)); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dFloat32Workload) +{ + RefCreatePooling2dWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreatePooling2dUint8Workload) +{ + RefCreatePooling2dWorkloadTest(); +} + +template +static void RefCreateSoftmaxWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateSoftmaxWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest). + CheckInputOutput( + std::move(workload), + TensorInfo({4, 1}, DataType), + TensorInfo({4, 1}, DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat32Workload) +{ + RefCreateSoftmaxWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSoftmaxUint8Workload) +{ + RefCreateSoftmaxWorkloadTest(); +} + +template +static void RefCreateSplitterWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateSplitterWorkloadTest(factory, graph); + + // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest). + SplitterQueueDescriptor queueDescriptor = workload->GetData(); + auto inputHandle = boost::polymorphic_downcast(queueDescriptor.m_Inputs[0]); + BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, DataType))); + + auto outputHandle0 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[0]); + BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, DataType))); + + auto outputHandle1 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[1]); + BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType))); + + auto outputHandle2 = boost::polymorphic_downcast(queueDescriptor.m_Outputs[2]); + BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, DataType))); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload) +{ + RefCreateSplitterWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterUint8Workload) +{ + RefCreateSplitterWorkloadTest(); +} + +template +static void RefCreateSplitterMergerWorkloadTest() +{ + // Tests that it is possible to decide which output of the splitter layer + // should be lined to which input of the merger layer. + // We tested that is is possible to specify 0th output + // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input + // of the merger. + + Graph graph; + RefWorkloadFactory factory; + auto workloads = CreateSplitterMergerWorkloadTest + (factory, graph); + + auto wlSplitter = std::move(workloads.first); + auto wlMerger = std::move(workloads.second); + + //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction. + armnn::CpuTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); + armnn::CpuTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); + armnn::CpuTensorHandle* mIn0 = dynamic_cast(wlMerger->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* mIn1 = dynamic_cast(wlMerger->GetData().m_Inputs[1]); + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(mIn0); + BOOST_TEST(mIn1); + + bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat32) +{ + RefCreateSplitterMergerWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSplitterMergerUint8) +{ + RefCreateSplitterMergerWorkloadTest(); +} + +template +static void RefCreateSingleOutputMultipleInputsTest() +{ + // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer. + // We created a splitter with two outputs. That each of those outputs is used by two different activation layers. + + Graph graph; + RefWorkloadFactory factory; + std::unique_ptr wlSplitter; + std::unique_ptr wlActiv0_0; + std::unique_ptr wlActiv0_1; + std::unique_ptr wlActiv1_0; + std::unique_ptr wlActiv1_1; + + CreateSplitterMultipleInputsOneOutputWorkloadTest(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1, wlActiv1_0, wlActiv1_1); + + armnn::CpuTensorHandle* sOut0 = dynamic_cast(wlSplitter->GetData().m_Outputs[0]); + armnn::CpuTensorHandle* sOut1 = dynamic_cast(wlSplitter->GetData().m_Outputs[1]); + armnn::CpuTensorHandle* activ0_0Im = dynamic_cast(wlActiv0_0->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ0_1Im = dynamic_cast(wlActiv0_1->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ1_0Im = dynamic_cast(wlActiv1_0->GetData().m_Inputs[0]); + armnn::CpuTensorHandle* activ1_1Im = dynamic_cast(wlActiv1_1->GetData().m_Inputs[0]); + + + BOOST_TEST(sOut0); + BOOST_TEST(sOut1); + BOOST_TEST(activ0_0Im); + BOOST_TEST(activ0_1Im); + BOOST_TEST(activ1_0Im); + BOOST_TEST(activ1_1Im); + + bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) && + (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im); + + BOOST_TEST(validDataPointers); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsFloat32) +{ + RefCreateSingleOutputMultipleInputsTest(); +} + +BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputsUint8) +{ + RefCreateSingleOutputMultipleInputsTest(); +} + +template +static void RefCreateResizeBilinearTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateResizeBilinearWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest). + CheckInputOutput( + std::move(workload), + TensorInfo({ 2, 3, 4, 4 }, DataType), + TensorInfo({ 2, 3, 2, 2 }, DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32) +{ + RefCreateResizeBilinearTest(); +} + +BOOST_AUTO_TEST_CASE(CreateResizeBilinearUint8) +{ + RefCreateResizeBilinearTest(); +} + +BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat32) +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateL2NormalizationWorkloadTest + (factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateL2NormalizationWorkloadTest). + CheckInputOutput( + std::move(workload), + TensorInfo({ 5, 20, 50, 67 }, armnn::DataType::Float32), + TensorInfo({ 5, 20, 50, 67 }, armnn::DataType::Float32)); +} + +template +static void RefCreateReshapeWorkloadTest() +{ + Graph graph; + RefWorkloadFactory factory; + auto workload = CreateReshapeWorkloadTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest). + CheckInputOutput( + std::move(workload), + TensorInfo({ 4, 1 }, DataType), + TensorInfo({ 1, 4 }, DataType)); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeFloat32Workload) +{ + RefCreateReshapeWorkloadTest(); +} + +BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload) +{ + RefCreateReshapeWorkloadTest(); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/FullyConnectedTestImpl.hpp b/src/backends/test/FullyConnectedTestImpl.hpp new file mode 100644 index 0000000000..125b7e62b1 --- /dev/null +++ b/src/backends/test/FullyConnectedTestImpl.hpp @@ -0,0 +1,287 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +template +LayerTestResult SimpleFullyConnectedTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + armnn::TensorInfo weightsDesc, + armnn::TensorInfo biasesDesc, + boost::multi_array& weights, + boost::multi_array& bias, + boost::multi_array& input, + bool biasEnabled, + bool transposeWeights) +{ + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::FullyConnectedQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle weightsTensor(weightsDesc); + armnn::ScopedCpuTensorHandle biasTensor(biasesDesc); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, &weights[0][0]); + AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_BiasEnabled = biasEnabled; + data.m_Parameters.m_TransposeWeightMatrix = transposeWeights; + + std::unique_ptr workload = workloadFactory.CreateFullyConnected(data, info); + LayerTestResult result(outputTensorInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get()); + + return result; +} + +LayerTestResult FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled, + bool transposeWeights) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + // Define the tensor descriptors. + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + unsigned int biasShape[] = { outputChannels }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32); + weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32); + biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32); + + LayerTestResult result(outputTensorInfo); + + boost::multi_array input = MakeTensor(inputTensorInfo, std::vector( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + }) + ); + + boost::multi_array weights = MakeTensor(weightsDesc, std::vector( + { + .5f, 2.f, .5f, + .5f, 2.f, 1.f, + .5f, 2.f, 2.f, + .5f, 2.f, 3.f, + .5f, 2.f, 4.f + })); + + if (transposeWeights) + { + weights = MakeTensor(weightsDesc, std::vector( + { + .5f, .5f, .5f, .5f, .5f, + 2.f, 2.f, 2.f, 2.f, 2.f, + .5f, 1.f, 2.f, 3.f, 4.f + })); + } + + + std::vector biasValues({0.f, 0.f, 0.f}); + if (biasEnabled) + { + biasValues = std::vector({10.f, 20.f, 30.f}); + } + boost::multi_array bias = MakeTensor(biasesDesc, biasValues); + + result = SimpleFullyConnectedTestImpl( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + biasEnabled, transposeWeights + ); + + result.outputExpected = MakeTensor(outputTensorInfo, std::vector( + { + 0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0], + 2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1], + 0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2], + + 2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0], + 10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1], + 2.5f + 4.0f + 6.0f + 6.f + 4.f + biasValues[2] + }) + ); + + return result; +} + +LayerTestResult FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + constexpr static unsigned int inputWidth = 3u; + constexpr static unsigned int inputHeight = 2u; + constexpr static unsigned int inputChannels = 1u; + + constexpr static unsigned int inputSize = inputWidth * inputHeight * inputChannels; + + constexpr static unsigned int outputChannels = 2u; + + armnn::TensorInfo inputTensorInfo({ 1, inputChannels, inputHeight, inputWidth }, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.1f); + inputTensorInfo.SetQuantizationOffset(63); + + armnn::TensorInfo outputTensorInfo({ 1, outputChannels }, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(5.f); + outputTensorInfo.SetQuantizationOffset(biasEnabled ? -50 : 10); + + armnn::TensorInfo weightsDesc({ outputChannels, inputSize }, armnn::DataType::QuantisedAsymm8); + weightsDesc.SetQuantizationScale(0.2f); + weightsDesc.SetQuantizationOffset(93); + + armnn::TensorInfo biasesDesc({ outputChannels }, armnn::DataType::Signed32); + biasesDesc.SetQuantizationScale(inputTensorInfo.GetQuantizationScale() * weightsDesc.GetQuantizationScale()); + biasesDesc.SetQuantizationOffset(0); + + LayerTestResult result(outputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, std::vector{51, 124, 28, + 251, 8, 92}); + + auto weights = MakeTensor(weightsDesc, std::vector{51, 193, 42, 53, 175, 34, + 210, 145, 23, 74, 34, 150}); + + // scale = 0.02 + // offset = 0 + auto bias = MakeTensor(biasesDesc, std::vector{9250, 67500}); + + result = SimpleFullyConnectedTestImpl( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + biasEnabled, true + ); + + // Manually calculated. + // Note one of these values has been clamped to 0. + if (biasEnabled) + { + result.outputExpected = MakeTensor(outputTensorInfo, std::vector{0, 242}); + } + else + { + result.outputExpected = MakeTensor(outputTensorInfo, std::vector{0, 32}); + } + + return result; +} + + + +// +// ArmNN variant of the AndroidNN fully_connected_float_large test. +// +// Tests the fully connected layer with large values, optionally transposing weights. +// Note this is templated for consistency, but the nature of this tests makes it unlikely to be useful in Uint8 mode. +// +template +LayerTestResult FullyConnectedLargeTestCommon(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 1; + + unsigned int outputChannels = 1; + unsigned int outputNum = 1; + + // Define the tensor descriptors. + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + + unsigned int biasShape[] = { outputChannels }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType()); + outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType()); + weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::GetDataType()); + biasesDesc = armnn::TensorInfo(1, biasShape, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult result(outputTensorInfo); + + boost::multi_array input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f, + }) + ); + + boost::multi_array weights = MakeTensor(weightsDesc, + QuantizedVector(qScale, qOffset, { + 2.0f, 3.0f, 4.0f, 5.0f, 6.0f + }) + ); + + std::vector biasValues({900000.f}); + boost::multi_array bias = MakeTensor(biasesDesc, biasValues); + + result = SimpleFullyConnectedTestImpl( + workloadFactory, + inputTensorInfo, outputTensorInfo, + weightsDesc, biasesDesc, + weights, bias, input, + true, transposeWeights + ); + + result.outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 965432.0f, + }) + ); + + return result; +} diff --git a/src/backends/test/IsLayerSupportedTest.cpp b/src/backends/test/IsLayerSupportedTest.cpp new file mode 100644 index 0000000000..97d3de5e38 --- /dev/null +++ b/src/backends/test/IsLayerSupportedTest.cpp @@ -0,0 +1,239 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include + +#include "test/TensorHelpers.hpp" +#include "LayerTests.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/RefWorkloadFactory.hpp" + +#include +#include +#include +#include + +#include "IsLayerSupportedTestImpl.hpp" +#include "ClContextControlFixture.hpp" + +#include "layers/ConvertFp16ToFp32Layer.hpp" +#include "layers/ConvertFp32ToFp16Layer.hpp" + +BOOST_AUTO_TEST_SUITE(IsLayerSupported) + +BOOST_AUTO_TEST_CASE(IsLayerSupportedLayerTypeMatches) +{ + LayerTypeMatchesTest(); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Reference) +{ + armnn::RefWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float32 data type input"); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float16 data type output"); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float16 data type input"); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputReference) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Layer is not supported with float32 data type output"); +} + +#ifdef ARMCOMPUTENEON_ENABLED +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat32Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Neon) +{ + armnn::NeonWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp16ToFp32SupportedNeon) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedNeon) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} +#endif //#ifdef ARMCOMPUTENEON_ENABLED. + + +#ifdef ARMCOMPUTECL_ENABLED + +BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat16Cl, ClContextControlFixture) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_FIXTURE_TEST_CASE(IsLayerSupportedFloat32Cl, ClContextControlFixture) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_FIXTURE_TEST_CASE(IsLayerSupportedUint8Cl, ClContextControlFixture) +{ + armnn::ClWorkloadFactory factory; + IsLayerSupportedTests(&factory); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp32InputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float16"); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp16ToFp32SupportedFp16OutputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float32"); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(result); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp16InputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Input should be Float32"); +} + +BOOST_FIXTURE_TEST_CASE(IsConvertFp32ToFp16SupportedFp32OutputCl, ClContextControlFixture) +{ + std::string reasonIfUnsupported; + + bool result = IsConvertLayerSupportedTests(reasonIfUnsupported); + + BOOST_CHECK(!result); + BOOST_CHECK_EQUAL(reasonIfUnsupported, "Output should be Float16"); +} +#endif //#ifdef ARMCOMPUTECL_ENABLED. + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/IsLayerSupportedTestImpl.hpp b/src/backends/test/IsLayerSupportedTestImpl.hpp new file mode 100644 index 0000000000..c5389df06e --- /dev/null +++ b/src/backends/test/IsLayerSupportedTestImpl.hpp @@ -0,0 +1,565 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Graph.hpp" + +#include + +namespace +{ +armnn::Graph dummyGraph; + +// Make a dummy TensorInfo object. +template +armnn::TensorInfo MakeDummyTensorInfo() +{ + return armnn::TensorInfo({2,2,2,2}, DataType); +} + + +// Make a dummy WorkloadInfo using a dummy TensorInfo. +template +armnn::WorkloadInfo MakeDummyWorkloadInfo(unsigned int numInputs, unsigned int numOutputs) +{ + armnn::WorkloadInfo info; + for (unsigned int i=0; i < numInputs; i++) + { + info.m_InputTensorInfos.push_back(MakeDummyTensorInfo()); + } + for (unsigned int o=0; o < numOutputs; o++) + { + info.m_OutputTensorInfos.push_back(MakeDummyTensorInfo()); + } + return info; +} + +// Template class to create a dummy layer (2 parameters). +template +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(DescType(), ""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + LayerType* m_Layer; +}; + +// Template class to create a dummy layer (1 parameter). +template +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + LayerType* m_Layer; +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(armnn::BatchNormalizationDescriptor(), ""); + m_Layer->m_Mean = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Variance = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Beta = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Gamma = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::BatchNormalizationLayer* m_Layer; + +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(""); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::ConstantLayer* m_Layer; +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(armnn::LayerBindingId(), ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::InputLayer* m_Layer; +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + armnn::OriginsDescriptor desc(2); + m_Layer = dummyGraph.AddLayer(desc, ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::MergerLayer* m_Layer; +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + m_Layer = dummyGraph.AddLayer(armnn::LayerBindingId(), ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::OutputLayer* m_Layer; +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + armnn::ViewsDescriptor desc(1); + m_Layer = dummyGraph.AddLayer(desc, ""); + + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::SplitterLayer* m_Layer; +}; + +template +struct DummyConvolutionLayer +{ + DummyConvolutionLayer() + { + typename ConvolutionLayerType::DescriptorType desc; + m_Layer = dummyGraph.AddLayer(desc, ""); + m_Layer->m_Weight = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_Bias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyConvolutionLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + ConvolutionLayerType* m_Layer; +}; + +template<> +struct DummyLayer + : public DummyConvolutionLayer +{ +}; + +template<> +struct DummyLayer + : public DummyConvolutionLayer +{ +}; + +template +struct DummyLstmLayer +{ + DummyLstmLayer() + { + typename LstmLayerType::DescriptorType desc; + desc.m_CifgEnabled = false; + + m_Layer = dummyGraph.AddLayer(armnn::LstmDescriptor(), ""); + m_Layer->m_BasicParameters.m_InputToForgetWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_InputToCellWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_InputToOutputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_RecurrentToForgetWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_RecurrentToCellWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_RecurrentToOutputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_ForgetGateBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_CellBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_BasicParameters.m_OutputGateBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + + m_Layer->m_CifgParameters.m_InputToInputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_CifgParameters.m_RecurrentToInputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_CifgParameters.m_CellToInputWeights = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + m_Layer->m_CifgParameters.m_InputGateBias = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyLstmLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::LstmLayer* m_Layer; +}; + +template<> +struct DummyLayer + : public DummyLstmLayer +{ +}; + +template<> +struct DummyLayer +{ + DummyLayer() + { + armnn::FullyConnectedLayer::DescriptorType desc; + m_Layer = dummyGraph.AddLayer(desc, ""); + m_Layer->m_Weight = std::make_unique( + armnn::TensorInfo(armnn::TensorShape({1,1,1,1}), armnn::DataType::Float32)); + } + ~DummyLayer() + { + dummyGraph.EraseLayer(m_Layer); + } + armnn::FullyConnectedLayer* m_Layer; +}; + +// Tag for giving LayerType entries a unique strong type each. +template +struct Tag{}; + +#define DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, descType) \ +template \ +struct LayerTypePolicy \ +{ \ + using Type = armnn::name##Layer; \ + using Desc = descType; \ + using QueueDesc = armnn::name##QueueDescriptor; \ + constexpr static const char* NameStr = #name; \ + \ + static std::unique_ptr MakeDummyWorkload(armnn::IWorkloadFactory *factory, \ + unsigned int nIn, unsigned int nOut) \ + { \ + QueueDesc desc; \ + armnn::WorkloadInfo info = MakeDummyWorkloadInfo(nIn, nOut); \ + return factory->Create##name(desc, info); \ + } \ +}; + +// Define a layer policy specialization for use with the IsLayerSupported tests. +// Use this version for layers whose constructor takes 1 parameter(name). +#define DECLARE_LAYER_POLICY_1_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, void) + +// Define a layer policy specialization for use with the IsLayerSupported tests. +// Use this version for layers whose constructor takes 2 parameters(descriptor and name). +#define DECLARE_LAYER_POLICY_2_PARAM(name) DECLARE_LAYER_POLICY_CUSTOM_PARAM(name, armnn::name##Descriptor) + +// Layer policy template. +template +struct LayerTypePolicy; + +// Every entry in the armnn::LayerType enum must be accounted for below. +DECLARE_LAYER_POLICY_2_PARAM(Activation) + +DECLARE_LAYER_POLICY_1_PARAM(Addition) + +DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization) + +DECLARE_LAYER_POLICY_1_PARAM(Constant) + +DECLARE_LAYER_POLICY_1_PARAM(ConvertFp16ToFp32) + +DECLARE_LAYER_POLICY_1_PARAM(ConvertFp32ToFp16) + +DECLARE_LAYER_POLICY_2_PARAM(Convolution2d) + +DECLARE_LAYER_POLICY_1_PARAM(MemCopy) + +DECLARE_LAYER_POLICY_2_PARAM(DepthwiseConvolution2d) + +DECLARE_LAYER_POLICY_2_PARAM(FakeQuantization) + +DECLARE_LAYER_POLICY_1_PARAM(Floor) + +DECLARE_LAYER_POLICY_2_PARAM(FullyConnected) + +DECLARE_LAYER_POLICY_CUSTOM_PARAM(Input, armnn::LayerBindingId) + +DECLARE_LAYER_POLICY_1_PARAM(L2Normalization) + +DECLARE_LAYER_POLICY_2_PARAM(Lstm) + +DECLARE_LAYER_POLICY_2_PARAM(Mean) + +DECLARE_LAYER_POLICY_2_PARAM(Merger) + +DECLARE_LAYER_POLICY_1_PARAM(Multiplication) + +DECLARE_LAYER_POLICY_2_PARAM(Normalization) + +DECLARE_LAYER_POLICY_CUSTOM_PARAM(Output, armnn::LayerBindingId) + +DECLARE_LAYER_POLICY_2_PARAM(Permute) + +DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) + +DECLARE_LAYER_POLICY_1_PARAM(Division) + +DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) + +DECLARE_LAYER_POLICY_2_PARAM(Reshape) + +DECLARE_LAYER_POLICY_2_PARAM(Softmax) + +DECLARE_LAYER_POLICY_2_PARAM(Splitter) + +DECLARE_LAYER_POLICY_1_PARAM(Subtraction) + + +// Generic implementation to get the number of input slots for a given layer type; +template +unsigned int GetNumInputs(const armnn::Layer& layer) +{ + return layer.GetNumInputSlots(); +} + +// Generic implementation to get the number of output slots for a given layer type; +template +unsigned int GetNumOutputs(const armnn::Layer& layer) +{ + return layer.GetNumOutputSlots(); +} + +template<> +unsigned int GetNumInputs(const armnn::Layer& layer) +{ + boost::ignore_unused(layer); + return 2; +} + +// Tests that the IsLayerSupported() function returns the correct value. +// We determined the correct value by *trying* to create the relevant workload and seeing if it matches what we expect. +// Returns true if expectations are met, otherwise returns false. +template +bool IsLayerSupportedTest(FactoryType *factory, Tag) +{ + using LayerPolicy = LayerTypePolicy; + using LayerType = typename LayerPolicy::Type; + using LayerDesc = typename LayerPolicy::Desc; + DummyLayer layer; + + unsigned int numIn = GetNumInputs(*layer.m_Layer); + unsigned int numOut = GetNumOutputs(*layer.m_Layer); + + // Make another dummy layer just to make IsLayerSupported have valid inputs. + DummyLayer previousLayer; + // Set output of the previous layer to a dummy tensor. + armnn::TensorInfo output = MakeDummyTensorInfo(); + previousLayer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); + // Connect all outputs of the previous layer to inputs of tested layer. + for (unsigned int i = 0; i < numIn; i++) + { + armnn::IOutputSlot& previousLayerOutputSlot = previousLayer.m_Layer->GetOutputSlot(0); + armnn::IInputSlot& layerInputSlot = layer.m_Layer->GetInputSlot(i); + previousLayerOutputSlot.Connect(layerInputSlot); + } + // Set outputs of tested layer to a dummy tensor. + for (unsigned int i = 0; i < numOut; i++) + { + layer.m_Layer->GetOutputSlot(0).SetTensorInfo(output); + } + + std::string layerName = LayerPolicy::NameStr; + std::string reasonIfUnsupported; + if (FactoryType::IsLayerSupported(*layer.m_Layer, DataType, reasonIfUnsupported)) + { + std::string errorMsg = " layer expected support but found none."; + try + { + bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() != nullptr; + // hacky way (it has to be replaced): for Lstm, we only support F32 right now +// BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + return retVal; + } + catch(const armnn::InvalidArgumentException& e) + { + boost::ignore_unused(e); + // This is ok since we throw InvalidArgumentException when creating the dummy workload. + return true; + } + catch(const std::exception& e) + { + errorMsg = e.what(); + BOOST_TEST_ERROR(layerName << ": " << errorMsg); + return false; + } + catch(...) + { + errorMsg = "Unexpected error while testing support for "; + BOOST_TEST_ERROR(errorMsg << layerName); + return false; + } + } + else + { + std::string errorMsg = "layer expected no support (giving reason: " + reasonIfUnsupported + ") but found some."; + try + { + bool retVal = LayerPolicy::MakeDummyWorkload(factory, numIn, numOut).get() == nullptr; + BOOST_CHECK_MESSAGE(retVal, layerName << errorMsg); + return retVal; + } + // These two exceptions are ok: For workloads that are partially supported, attempting to instantiate them + // using parameters that make IsLayerSupported() return false should throw an + // InvalidArgumentException or UnimplementedException. + catch(const armnn::InvalidArgumentException& e) + { + boost::ignore_unused(e); + return true; + } + catch(const armnn::UnimplementedException& e) + { + boost::ignore_unused(e); + return true; + } + catch(const std::exception& e) + { + errorMsg = e.what(); + BOOST_TEST_ERROR(layerName << ": " << errorMsg); + return false; + } + catch(...) + { + errorMsg = "Unexpected error while testing support for "; + BOOST_TEST_ERROR(errorMsg << layerName); + return false; + } + } +} + +// Helper function to compute the next type in the LayerType enum. +constexpr armnn::LayerType NextType(armnn::LayerType type) +{ + return static_cast(static_cast(type)+1); +} + +// Termination function for determining the end of the LayerType enumeration. +template +bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag) +{ + return IsLayerSupportedTest(factory, Tag()); +}; + +// Recursive function to test and enter in the LayerType enum and then iterate on the next entry. +template +bool IsLayerSupportedTestsImpl(FactoryType *factory, Tag) +{ + bool v = IsLayerSupportedTest(factory, Tag()); + + return v && + IsLayerSupportedTestsImpl + (factory, Tag()); +}; + +// Helper function to pass through to the test framework. +template +bool IsLayerSupportedTests(FactoryType *factory) +{ + return IsLayerSupportedTestsImpl(factory, Tag()); +}; + +template +bool TestLayerTypeMatches() +{ + using LayerPolicy = LayerTypePolicy; + using LayerType = typename LayerPolicy::Type; + using LayerDesc = typename LayerPolicy::Desc; + DummyLayer layer; + + std::stringstream ss; + ss << LayerPolicy::NameStr << " layer type mismatches expected layer type value."; + bool v = Type == layer.m_Layer->GetType(); + BOOST_CHECK_MESSAGE(v, ss.str()); + return v; +}; + +template +bool LayerTypeMatchesTestImpl(Tag) +{ + return TestLayerTypeMatches(); +}; + +template +bool LayerTypeMatchesTestImpl(Tag) +{ + return TestLayerTypeMatches() && + LayerTypeMatchesTestImpl(Tag()); +}; + +bool LayerTypeMatchesTest() +{ + return LayerTypeMatchesTestImpl(Tag()); +}; + +template +bool IsConvertLayerSupportedTests(std::string& reasonIfUnsupported) +{ + armnn::Graph graph; + LayerType* const layer = graph.AddLayer("LayerName"); + + armnn::Layer* const input = graph.AddLayer(0, "input"); + armnn::Layer* const output = graph.AddLayer(0, "output"); + + armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, InputDataType); + armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, OutputDataType); + + input->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + input->GetOutputHandler(0).SetTensorInfo(inputTensorInfo); + layer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + layer->GetOutputHandler(0).SetTensorInfo(outputTensorInfo); + + bool result = FactoryType::IsLayerSupported(*layer, InputDataType, reasonIfUnsupported); + + return result; +}; + +} //namespace diff --git a/src/backends/test/LayerReleaseConstantDataTest.cpp b/src/backends/test/LayerReleaseConstantDataTest.cpp new file mode 100644 index 0000000000..7566c72352 --- /dev/null +++ b/src/backends/test/LayerReleaseConstantDataTest.cpp @@ -0,0 +1,212 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include + +#include "backends/WorkloadData.hpp" +#include "Graph.hpp" + +#include + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClWorkloadFactory.hpp" + +using namespace armnn; +using namespace std; + +// connects two layers +void Connect(Layer* from, Layer* to, const TensorInfo& tensorInfo, unsigned int fromIndex = 0, unsigned int toIndex = 0) +{ + from->GetOutputSlot(fromIndex).Connect(to->GetInputSlot(toIndex)); + from->GetOutputHandler(fromIndex).SetTensorInfo(tensorInfo); +} + +///////////////////////////////////////////////////////////////////////////////////////////// +// The following test are created specifically to test ReleaseConstantData() method in the Layer +// They build very simple graphs including the layer will be checked. +// Checks weights and biases before the method called and after. +///////////////////////////////////////////////////////////////////////////////////////////// + +BOOST_AUTO_TEST_SUITE(LayerReleaseConstantDataTest) + +BOOST_AUTO_TEST_CASE(ReleaseBatchNormalizationLayerConstantDataTest) +{ + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + BatchNormalizationDescriptor layerDesc; + layerDesc.m_Eps = 0.05f; + BatchNormalizationLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + armnn::TensorInfo weightInfo({3}, armnn::DataType::Float32); + layer->m_Mean = std::make_unique(weightInfo); + layer->m_Variance = std::make_unique(weightInfo); + layer->m_Beta = std::make_unique(weightInfo); + layer->m_Gamma = std::make_unique(weightInfo); + layer->m_Mean->Allocate(); + layer->m_Variance->Allocate(); + layer->m_Beta->Allocate(); + layer->m_Gamma->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + armnn::TensorInfo tensorInfo({2, 3, 1, 1}, armnn::DataType::Float32); + Connect(input, layer, tensorInfo); + Connect(layer, output, tensorInfo); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Mean != nullptr); + BOOST_CHECK(layer->m_Variance != nullptr); + BOOST_CHECK(layer->m_Beta != nullptr); + BOOST_CHECK(layer->m_Gamma != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Mean == nullptr); + BOOST_CHECK(layer->m_Variance == nullptr); + BOOST_CHECK(layer->m_Beta == nullptr); + BOOST_CHECK(layer->m_Gamma == nullptr); + + } + + + BOOST_AUTO_TEST_CASE(ReleaseConvolution2dLayerConstantDataTest) + { + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + Convolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + Convolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + layer->m_Weight = std::make_unique(TensorInfo({2, 3, 5, 3}, + armnn::DataType::Float32)); + layer->m_Bias = std::make_unique + (TensorInfo({2}, GetBiasDataType(armnn::DataType::Float32))); + + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); + Connect(layer, output, TensorInfo({2, 2, 2, 10}, armnn::DataType::Float32)); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Weight != nullptr); + BOOST_CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Weight == nullptr); + BOOST_CHECK(layer->m_Bias == nullptr); +} + +BOOST_AUTO_TEST_CASE(ReleaseDepthwiseConvolution2dLayerConstantDataTest) +{ + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + DepthwiseConvolution2dDescriptor layerDesc; + layerDesc.m_PadLeft = 3; + layerDesc.m_PadRight = 3; + layerDesc.m_PadTop = 1; + layerDesc.m_PadBottom = 1; + layerDesc.m_StrideX = 2; + layerDesc.m_StrideY = 4; + layerDesc.m_BiasEnabled = true; + + DepthwiseConvolution2dLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + layer->m_Weight = std::make_unique(TensorInfo({3, 3, 5, 3}, DataType::Float32)); + layer->m_Bias = std::make_unique(TensorInfo({9}, DataType::Float32)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32)); + Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32)); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Weight != nullptr); + BOOST_CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Weight == nullptr); + BOOST_CHECK(layer->m_Bias == nullptr); +} + +BOOST_AUTO_TEST_CASE(ReleaseFullyConnectedLayerConstantDataTest) +{ + Graph graph; + ClWorkloadFactory factory; + + // create the layer we're testing + FullyConnectedDescriptor layerDesc; + layerDesc.m_BiasEnabled = true; + layerDesc.m_TransposeWeightMatrix = true; + + FullyConnectedLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + float inputsQScale = 1.0f; + float outputQScale = 2.0f; + + layer->m_Weight = std::make_unique(TensorInfo({7, 20}, + DataType::QuantisedAsymm8, inputsQScale, 0)); + layer->m_Bias = std::make_unique(TensorInfo({7}, + GetBiasDataType(DataType::QuantisedAsymm8), inputsQScale)); + layer->m_Weight->Allocate(); + layer->m_Bias->Allocate(); + + // create extra layers + Layer* const input = graph.AddLayer(0, "input"); + Layer* const output = graph.AddLayer(0, "output"); + + // connect up + Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType::QuantisedAsymm8, inputsQScale)); + Connect(layer, output, TensorInfo({3, 7}, DataType::QuantisedAsymm8, outputQScale)); + + // check the constants that they are not NULL + BOOST_CHECK(layer->m_Weight != nullptr); + BOOST_CHECK(layer->m_Bias != nullptr); + + // free up the constants.. + layer->ReleaseConstantData(); + + // check the constants that they are NULL now + BOOST_CHECK(layer->m_Weight == nullptr); + BOOST_CHECK(layer->m_Bias == nullptr); +} + +BOOST_AUTO_TEST_SUITE_END() + diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp new file mode 100644 index 0000000000..4dcc36fdb2 --- /dev/null +++ b/src/backends/test/LayerTests.cpp @@ -0,0 +1,4750 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "LayerTests.hpp" + +#include "test/TensorHelpers.hpp" +#include "TensorCopyUtils.hpp" +#include "Permute.hpp" + +#include +#include + +#include "armnn/LayerSupport.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#endif + +#include +#include + +#include "WorkloadTestUtils.hpp" +#include "Conv2dTestImpl.hpp" +#include "BatchNormTestImpl.hpp" +#include "ActivationTestImpl.hpp" +#include "Pooling2dTestImpl.hpp" +#include "ReshapeTestImpl.hpp" +#include "FullyConnectedTestImpl.hpp" +#include "SplitterTestImpl.hpp" +#include "SoftmaxTestImpl.hpp" +#include "NormTestImpl.hpp" +#include "PermuteTestImpl.hpp" +#include "LstmTestImpl.hpp" +#include "ConvertFp16ToFp32TestImpl.hpp" +#include "ConvertFp32ToFp16TestImpl.hpp" + +// 3-channel 16x8 image used as common input data for a number of Conv2d tests. +static std::vector ConvInput3x8x16({ + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 +}); + +// 2-channel bias used by a number of Conv2d tests. +static std::vector Bias2({0, 2}); + +// Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled. +template +boost::multi_array GetBias2(bool biasEnabled, float qScale, int32_t qOffset) +{ + if(biasEnabled) + { + armnn::TensorInfo biasDesc({static_cast(Bias2.size())}, armnn::GetDataType()); + boost::multi_array bias = MakeTensor(biasDesc, QuantizedVector(qScale, qOffset, Bias2)); + return bias; + } + else + { + return boost::multi_array(); + } +} + +template +LayerTestResult SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use common single-batch 3-channel 16x8 image. + armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, QuantizedVector(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch with 3-channel 3x5 kernels. + armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType()); + boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( + QuantizedVector(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 2 batch elements of a 1-channel 14x4 image. + armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + QuantizedVector(qScale, qOffset, { + -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, + -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, + -23.5f, -23.5f, -23.5f, + + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl(workloadFactory, + input, + kernel, + GetBias2::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset); +} + +template +LayerTestResult SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path. + + // Use common single-batch 3-channel 16x8 image. + armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, QuantizedVector(qScale, qOffset, ConvInput3x8x16)); + + // Use a 2-element batch of 3-channel 3x3 kernels. + armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType()); + boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( + QuantizedVector(qScale, qOffset, { + 1, 1, 1, + 1, -1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 2, 2, 2, + 2, 2, 2, + 2, 2, 2, + + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 1, 1, 1, + 1, 1, 1, + 1, 1, 1, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + }))); + + // Expected output is 1 batch of a 2-channel 14x6 image. + armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + QuantizedVector(qScale, qOffset, { + -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, + -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f, + + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl(workloadFactory, + input, + kernel, + GetBias2::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset); +} + +LayerTestResult SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x5TestCommon(workloadFactory, 0.f, 0, biasEnabled); +} + +LayerTestResult SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x5TestCommon(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x3TestCommon(workloadFactory, 0.f, 0, biasEnabled); +} + +LayerTestResult SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return SimpleConvolution2d3x3TestCommon(workloadFactory, 0.5f, 50, biasEnabled); +} + +template +LayerTestResult Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon( + armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 3x3 image as input. + armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + QuantizedVector(qScale, qOffset, { + 11,21,31, + 12,22,32, + 13,23,33 + }))); + + // Use 1 batch of a 1-channel 2x2 kernel. + armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType()); + boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( + QuantizedVector(qScale, qOffset, { + -11,-21, + -12,-22, + }))); + +// Expected output is 1 batch of a 1-channel 6x8 image. +// Manually calculated like this: +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*11 ; -11*0 -21*0 -12*11 -22*21 ; -11*0 -21*0 -12*21 -22*31 ; -11*0 -21*0 -12*31 -22*0 ..] +//[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..] +//[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..] +//[-11*0 -21*13 -12*0 -22*0 ; -11*13 -21*23 -12*0 -22*0 ; -11*23 -21*33 -12*0 -22*0 ; -11*33 -21*0 -12*0 -22*0 ..] +//[-11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ; -11*0 -21*0 -12*0 -22*0 ..] +//[..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ; ..... ..... ..... ..... ..] + armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + QuantizedVector(qScale, qOffset, { + 0, 0, 0, 0, 0, 0, + -242, -594, -934, -372, 0, 0, + -495, -1190, -1850, -725, 0, 0, + -538, -1256, -1916, -748, 0, 0, + -273, -626, -946, -363, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0 + }))); + + return SimpleConvolution2dTestImpl(workloadFactory, + input, + kernel, + GetBias2::Type>(false, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // Padding left. + 2, // Padding top. + 3, // Padding right. + 4); // Padding bottom. +} + +template +LayerTestResult SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + // Use a single-batch 1-channel 5x5 image as input. + armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + QuantizedVector(qScale, qOffset, { + 11,21,31,41,51, + 12,22,32,42,52, + 13,23,33,43,53, + 14,24,34,44,54, + 15,25,35,45,55, + }))); + + // Use 1 batch of a 1-channel 4x4 kernel. + armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType()); + boost::multi_array kernel = MakeTensor(kernelDesc, std::vector( + QuantizedVector(qScale, qOffset, { + -11,-21,-31,-41, + -12,-22,-32,-42, + -13,-23,-33,-43, + -14,-24,-34,-44, + }))); + + // Expected output is 1 batch of a 1-channel 5x5 image. + armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType()); + std::vector myVec(outputDesc.GetNumElements(), 0); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + QuantizedVector(qScale, qOffset, { + -7140, -10580, -13940, -9300, -5230, + -9590, -14120, -18520, -12290, -6860, + -9980, -14560, -18960, -12560, -7000, + -7518, -10904, -14144, -9318, -5152, + -5032, -7256, -9376, -6142, -3368, + }))); + + return SimpleConvolution2dTestImpl(workloadFactory, + input, + kernel, + GetBias2::Type>(false, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2); // Padding bottom. +} + +template +LayerTestResult DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset, + bool biasEnabled) +{ + // Use a single-batch 2-channel 5x5 image as input. + armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), { + 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, + + 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49 + }))); + + // Use a depth multiplier of 1 on a 2-channel 4x4 kernel. + armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType()); + auto kernel = MakeTensor(kernelTensorInfo, std::vector( + QuantizedVector(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), { + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, 23, 22, 21, + 20, 19, 18, 17, + + 16, 15, 14, 13, + 12, 11, 10, 9, + 8, 7, 6, 5, + 4, 3, 2, 1 + }))); + + // Expected output is 1 batch of a 2-channel 5x5 image. + // Calculated using the python tensorflow library with strideX=1, strideY=1. + armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputTensorInfo, std::vector( + QuantizedVector(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), { + 1062, 1580, 1850, 1530, 1117, + 2140, 3108, 3500, 2842, 2042, + 3580, 5068, 5460, 4342, 3062, + 3618, 5072, 5390, 4248, 2971, + 3074, 4282, 4510, 3533, 2457, + 1550, 2284, 2362, 1955, 1428, + 2910, 4206, 4342, 3528, 2536, + 3390, 4886, 5022, 4068, 2916, + 3566, 5056, 5182, 4133, 2922, + 3100, 4352, 4452, 3517, 2465 + }))); + + return DepthwiseConvolution2dAsymmetricTestImpl(workloadFactory, + input, + kernel, + GetBias2::Type>(biasEnabled, qScale, qOffset), + expectedOutput, + qScale, + qOffset, + 1, // Padding left. + 1, // Padding top. + 2, // Padding right. + 2, // Padding bottom. + 1, // strideX + 1); // strideY +} + +LayerTestResult +Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(workloadFactory, 0.0f, 0); +} + +LayerTestResult Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleConvolution2dAsymmetricPaddingTestCommon(workloadFactory, 0.0f, 0); +} + +LayerTestResult DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dTestImpl(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dDepthMul1TestImpl(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dAsymmetricTestCommon(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dTestImpl(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled) +{ + return DepthwiseConvolution2dDepthMul1TestImpl(workloadFactory, 0.5f, 50, biasEnabled); +} + +LayerTestResult Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + return Convolution1dTestImpl(workloadFactory, 0.0f, 0, biasEnabled); +} + +LayerTestResult Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled) +{ + return Convolution1dTestImpl(workloadFactory, 0.1f, 128, biasEnabled); +} + +LayerTestResult CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareConvolution2dTestImpl(workloadFactory, refWorkloadFactory); +} + +template +LayerTestResult CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + return CompareDepthwiseConvolution2dTestImpl(workloadFactory, refWorkloadFactory); +} + +template LayerTestResult CompareDepthwiseConvolution2dTest( + armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); +template LayerTestResult CompareDepthwiseConvolution2dTest( + armnn::IWorkloadFactory&, armnn::IWorkloadFactory&); + +LayerTestResult SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + auto normChannel = armnn::NormalizationAlgorithmChannel::Across; + return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); +} + +LayerTestResult SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + auto normChannel = armnn::NormalizationAlgorithmChannel::Within; + return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod); +} + +LayerTestResult SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + return SimpleSoftmaxTestImpl(workloadFactory, beta); +} + +LayerTestResult SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + return SimpleSoftmaxTestImpl(workloadFactory, beta); +} + +LayerTestResult CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod); +} + +LayerTestResult CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + return CompareSoftmaxTestImpl(workloadFactory, refWorkloadFactory, beta); +} + +LayerTestResult CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + return CompareSoftmaxTestImpl(workloadFactory, refWorkloadFactory, beta); +} + +std::vector> SplitterTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SplitterTestCommon(workloadFactory); +} + +std::vector> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SplitterTestCommon(workloadFactory, 1.0f, 0); +} + +LayerTestResult CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory) +{ + return CopyViaSplitterTestImpl(workloadFactory, 0.0f, 0); +} + +LayerTestResult CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return CopyViaSplitterTestImpl(workloadFactory, 1.0f, 0); +} + +LayerTestResult LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest( + armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputDesc({ 2, 2 }, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + { 2., 3., 3., 4. })); + + armnn::TensorInfo outputDesc({ 2, 4 }, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f, + -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f})); + return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput); +} + +LayerTestResult LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest( + armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputDesc({ 2, 5 }, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f, + 0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f})); + + armnn::TensorInfo outputDesc({ 2, 16 }, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {-0.00396806f, 0.029352f, -0.00279226f, 0.0159977f, -0.00835576f, + -0.0211779f, 0.0283512f, -0.0114597f, 0.00907307f, -0.0244004f, + -0.0152191f, -0.0259063f, 0.00914318f, 0.00415118f, 0.017147f, + 0.0134203f, -0.013869f, 0.0287268f, -0.00334693f, 0.00733398f, -0.0287926f, + -0.0186926f, 0.0193662f, -0.0115437f, 0.00422612f, -0.0345232f, + 0.00223253f, -0.00957321f, 0.0210624f, 0.013331f, 0.0150954f, + 0.02168f})); + return LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(workloadFactory, input, expectedOutput); +} + +LayerTestResult LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputDesc({2, 2}, armnn::GetDataType()); + boost::multi_array input = MakeTensor(inputDesc, std::vector( + {2., 3., 3., 4.})); + + + armnn::TensorInfo outputDesc({2, 4}, armnn::GetDataType()); + boost::multi_array expectedOutput = MakeTensor(outputDesc, std::vector( + {{-0.02973187f, 0.1229473f, 0.20885126f, -0.15358765f, + -0.0185422f, 0.11281417f, 0.24466537f, -0.1826292f}})); + + return LstmNoCifgNoPeepholeNoProjectionTestImpl(workloadFactory, input, expectedOutput); +} + +LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int outputWidth = 3; + unsigned int outputHeight = 6; + unsigned int outputChannels = 3; + + unsigned int inputWidth1 = 3; + unsigned int inputHeight1 = 6; + unsigned int inputChannels1 = 2; + + unsigned int inputWidth2 = 3; + unsigned int inputHeight2 = 6; + unsigned int inputChannels2 = 1; + + // Define the tensor descriptors. + armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32); + armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32); + + LayerTestResult ret(outputTensorInfo); + + ret.outputExpected = MakeTensor(outputTensorInfo, std::vector( + { + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, + + 19.0f, 20.0f, 21.0f, + 22.0f, 23.0f, 24.0f, + 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, + 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, + + 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, + 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, + 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, + }) + ); + + auto input1 = MakeTensor(inputTensorInfo1, std::vector( + { + 1.0f, 2.0f, 3.0f, + 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, + 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, + + 19.0f, 20.0f, 21.0f, + 22.0f, 23.0f, 24.0f, + 25.0f, 26.0f, 27.0f, + 28.0f, 29.0f, 30.0f, + 31.0f, 32.0f, 33.0f, + 34.0f, 35.0f, 36.0f, + }) + ); + + auto input2 = MakeTensor(inputTensorInfo2, std::vector( + { + 37.0f, 38.0f, 39.0f, + 40.0f, 41.0f, 42.0f, + 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, + 49.0f, 50.0f, 51.0f, + 52.0f, 53.0f, 54.0f, + }) + ); + + std::vector wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0]. + armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1]. + armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr inputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo1); + + std::unique_ptr inputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo2); + + armnn::MergerQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + + std::unique_ptr workload = workloadFactory.CreateMerger(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult AdditionTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 2; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + + auto input1 = MakeTensor(inputTensorInfo1, std::vector( + { + 0.0f, 2.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + + 1.0f, 2.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + + 0.0f, 2.0f, 1.0f, + 4.2f, 1.0f, 2.0f, + + 0.0f, 0.0f, 1.0f, + 0.2f, 1.0f, 2.0f, + })); + + auto input2 = MakeTensor(inputTensorInfo2, std::vector( + { + 1.0f, 2.0f, 1.0f, + 0.0f, 1.0f, 2.0f, + + 1.0f, 2.0f, -2.0f, + 0.2f, 1.0f, 2.0f, + + 0.0f, 2.0f, 1.0f, + 4.2f, 0.0f, -3.0f, + + 0.0f, 0.0f, 1.0f, + 0.7f, 1.0f, 5.0f, + })); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, std::vector( + { + 1.0f, 4.0f, 2.0f, + 0.2f, 2.0f, 4.0f, + + 2.0f, 4.0f, -1.0f, + 0.4f, 2.0f, 4.0f, + + 0.0f, 4.0f, 2.0f, + 8.4f, 1.0f, -1.0f, + + 0.0f, 0.0f, 2.0f, + 0.9f, 2.0f, 7.0f, + })); + + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template +LayerTestResult AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType()); + armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType()); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo1.SetQuantizationScale(qScale); + inputTensorInfo1.SetQuantizationOffset(qOffset); + inputTensorInfo2.SetQuantizationScale(qScale); + inputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input1 = MakeTensor(inputTensorInfo1, QuantizedVector(qScale, qOffset, + { + 0.0f, + 1.0f, + + 2.0f, + 3.0f, + + 4.0f, + 5.0f, + })); + + auto input2 = MakeTensor(inputTensorInfo2, QuantizedVector(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 3.5f, 4.5f, 5.5f, + })); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 4.5f, 5.5f, 6.5f, + + 2.5f, 3.5f, 4.5f, + 6.5f, 7.5f, 8.5f, + + 4.5f, 5.5f, 6.5f, + 8.5f, 9.5f, 10.5f, + })); + + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +template +LayerTestResult AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType()); + armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType()); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo1.SetQuantizationScale(qScale); + inputTensorInfo1.SetQuantizationOffset(qOffset); + inputTensorInfo2.SetQuantizationScale(qScale); + inputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input1 = MakeTensor(inputTensorInfo1, QuantizedVector(qScale, qOffset, + { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, + })); + + auto input2 = MakeTensor(inputTensorInfo2, QuantizedVector(qScale, qOffset, + { + 0.5f, + })); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, + { + 0.5f, 1.5f, 2.5f, + 3.5f, 4.5f, 5.5f, + 6.5f, 7.5f, 8.5f, + 9.5f, 10.5f, 11.5f, + 12.5f, 13.5f, 14.5f, + 15.5f, 16.5f, 17.5f, + })); + + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcastTestImpl(workloadFactory, 0.0f, 0); +} + +LayerTestResult AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcastTestImpl(workloadFactory, 2.f, 0); +} + +LayerTestResult AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcast1ElementTestImpl(workloadFactory, 0.0f, 0); +} + +LayerTestResult AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AdditionBroadcast1ElementTestImpl(workloadFactory, 0.1333333f, 128); +} + +LayerTestResult CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + unsigned int batchSize = 4; + unsigned int channels = 1; + unsigned int height = 2; + unsigned int width = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {batchSize, channels, height, width}; + + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + auto input1 = MakeRandomTensor(inputTensorInfo1, 1232); + auto input2 = MakeRandomTensor(inputTensorInfo2, 456); + + LayerTestResult ret(outputTensorInfo); + + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::AdditionQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get()); + SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateAddition(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + inputHandle1Ref->Allocate(); + inputHandle2Ref->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +namespace { +template +LayerTestResult DivisionTestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector& values0, + float scale0, + int32_t offset0, + const unsigned int shape1[4], + const std::vector & values1, + float scale1, + int32_t offset1, + const unsigned int outShape[4], + const std::vector & outValues, + float outScale, + int32_t outOffset) +{ + auto dataType = (std::is_same::value ? + armnn::DataType::QuantisedAsymm8 : + armnn::DataType::Float32); + + armnn::TensorInfo inputTensorInfo0(4, shape0, dataType); + armnn::TensorInfo inputTensorInfo1(4, shape1, dataType); + armnn::TensorInfo outputTensorInfo(4, outShape, dataType); + + inputTensorInfo0.SetQuantizationScale(scale0); + inputTensorInfo0.SetQuantizationOffset(offset0); + + inputTensorInfo1.SetQuantizationScale(scale1); + inputTensorInfo1.SetQuantizationOffset(offset1); + + outputTensorInfo.SetQuantizationScale(outScale); + outputTensorInfo.SetQuantizationOffset(outOffset); + + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, outValues); + + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::DivisionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateDivision(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} +} // anonymous namespace + +LayerTestResult DivisionByZeroTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + unsigned int shape[] = { batchSize, channelCount, height, width }; + + std::vector input0({ + 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 0.f, 0.f, + -1.f, -1.f, -1.f, -1.f, 5.f, 5.f, 5.f, 5.f }); + + std::vector input1({ + 0.f, 0.f, -0.f, -0.f, 0.f, 0.f, -0.f, -0.f, + 0.f, 0.f, -0.f, -0.f, 5.f, 5.f, 5.f, 5.f }); + + std::vector output({ + INFINITY, INFINITY, -INFINITY, -INFINITY, NAN, NAN, -NAN, -NAN, + -INFINITY, -INFINITY, INFINITY, INFINITY, 1, 1, 1, 1 }); + + return DivisionTestHelper(workloadFactory, + shape, input0, 1.0f, 0, + shape, input1, 1.0f, 0, + shape, output, 1.0f, 0); +} + +LayerTestResult DivisionTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + unsigned int shape[] = { batchSize, channelCount, height, width }; + + std::vector input0({ + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 }); + + std::vector input1({ + 1, 1, 1, 1, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4 }); + + std::vector output({ + 2, 2, 2, 2, 1.5, 1.5, 1.5, 1.5, + 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.25 }); + + + return DivisionTestHelper(workloadFactory, + shape, input0, 1.0f, 0, + shape, input1, 1.0f, 0, + shape, output, 1.0f, 0); +} + +LayerTestResult DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 2, 2, 2 }; + std::vector input0({ 2, 4, 6, 8, 10, 12, 14, 16}); + + unsigned int shape1[] = { 1, 1, 1, 1 }; + std::vector input1({ 2 }); + + std::vector output({ 1, 2, 3, 4, 5, 6, 7, 8}); + + + return DivisionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 3, 3, 2 }; + std::vector input0({ + 1, 4, 3, 8, 5, 12, + 7, 16, 9, 20, 11, 24, + 13, 28, 15, 32, 17, 36}); + + unsigned int shape1[] = { 1, 1, 1, 2 }; + std::vector input1({ 1, 2 }); + + std::vector output({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); + + return DivisionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + + +LayerTestResult DivisionUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + unsigned int shape[] = { batchSize, channelCount, height, width }; + + std::vector input0({2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 }); + + std::vector input1({1, 1, 1, 1, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4 }); + + std::vector output({8, 8, 8, 8, 6, 6, 6, 6, + 4, 4, 4, 4, 5, 5, 5, 5}); + + + return DivisionTestHelper(workloadFactory, + shape, input0, 1.0f, 0, + shape, input1, 1.0f, 0, + shape, output, 0.25f, 0); +} + +LayerTestResult DivisionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 2, 2, 2 }; + std::vector input0({ 2, 4, 6, 8, 10, 12, 14, 16}); + + unsigned int shape1[] = { 1, 1, 1, 1 }; + std::vector input1({ 2 }); + + std::vector output({ 1, 2, 3, 4, 5, 6, 7, 8}); + + return DivisionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult DivisionBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 3, 3, 2 }; + std::vector input0({1, 4, 3, 8, 5, 12, + 7, 16, 9, 20, 11, 24, + 13, 28, 15, 32, 17, 36}); + + unsigned int shape1[] = { 1, 1, 1, 2 }; + std::vector input1({ 1, 2 }); + + std::vector output({1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); + + return DivisionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +namespace { +LayerTestResult MultiplicationTestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector & values0, + const unsigned int shape1[4], + const std::vector & values1, + const unsigned int outShape[4], + const std::vector & outValues) +{ + const size_t dimensionCount = 4; + armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, armnn::DataType::Float32}; + armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, armnn::DataType::Float32}; + armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, armnn::DataType::Float32}; + + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); + + LayerTestResult ret(outputTensorInfo); + + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateMultiplication(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor(outputTensorInfo, outValues); + return ret; +} +} // anonymous namespace + + +LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 2; + const unsigned int channelCount = 2; + const unsigned int batchSize = 2; + + unsigned int shape[] = { batchSize, channelCount, height, width }; + + std::vector input0({ + 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4 }); + + std::vector input1({ + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5 }); + + std::vector output({ + 2, 2, 2, 2, 6, 6, 6, 6, + 12, 12, 12, 12, 20, 20, 20, 20 }); + + return MultiplicationTestHelper(workloadFactory, + shape, + input0, + shape, + input1, + shape, + output); +} + +LayerTestResult MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 2, 2, 2 }; + std::vector input0({ 1, 2, 3, 4, 5, 6, 7, 8}); + + unsigned int shape1[] = { 1, 1, 1, 1 }; + std::vector input1({ 2 }); + + std::vector output({ 2, 4, 6, 8, 10, 12, 14, 16}); + + return MultiplicationTestHelper(workloadFactory, + shape0, + input0, + shape1, + input1, + shape0, + output); +} + +LayerTestResult MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int shape0[] = { 1, 3, 3, 2 }; + std::vector input0({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18}); + + unsigned int shape1[] = { 1, 1, 1, 2 }; + std::vector input1({ 1, 2 }); + + std::vector output({ + 1, 4, 3, 8, 5, 12, + 7, 16, 9, 20, 11, 24, + 13, 28, 15, 32, 17, 36}); + + return MultiplicationTestHelper(workloadFactory, + shape0, + input0, + shape1, + input1, + shape0, + output); +} + +LayerTestResult CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + const unsigned int width = 16; + const unsigned int height = 32; + const unsigned int channelCount = 2; + const unsigned int batchSize = 5; + + armnn::TensorInfo inputTensorInfo0; + armnn::TensorInfo inputTensorInfo1; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int shape[] = { batchSize, channelCount, height, width }; + + inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + LayerTestResult comparisonResult(outputTensorInfo); + + auto input0 = MakeRandomTensor(inputTensorInfo0, 803506992); + auto input1 = MakeRandomTensor(inputTensorInfo1, 54902257); + + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::MultiplicationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get()); + SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateMultiplication(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + inputHandle0Ref->Allocate(); + inputHandle1Ref->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); + + return comparisonResult; +} + +LayerTestResult CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory) +{ + const unsigned int width = 2; + const unsigned int height = 3; + const unsigned int channels = 5; + const unsigned int batchSize = 3; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo tensorInfo; + + constexpr unsigned int shape[] = {batchSize, channels, height, width}; + constexpr unsigned int tensorShape[] = {channels}; + + inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32); + + auto input = MakeRandomTensor(inputTensorInfo, 21312); + + auto mean = MakeRandomTensor(tensorInfo, 123); + auto variance = MakeRandomTensor(tensorInfo, 234, 0.0f); + auto beta = MakeRandomTensor(tensorInfo, 123); + auto gamma = MakeRandomTensor(tensorInfo, 345); + + LayerTestResult ret(outputTensorInfo); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::BatchNormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + armnn::ScopedCpuTensorHandle meanTensor(tensorInfo); + armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo); + armnn::ScopedCpuTensorHandle betaTensor(tensorInfo); + armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo); + + AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]); + AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]); + AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]); + AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]); + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Mean = &meanTensor; + data.m_Variance = &varianceTensor; + data.m_Beta = &betaTensor; + data.m_Gamma = &gammaTensor; + data.m_Parameters.m_Eps = 0.01f; + + armnn::BatchNormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateBatchNormalization(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + inputHandleRef->Allocate(); + outputHandleRef->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + +template +void PermuteTensorData( + armnn::IWorkloadFactory& workloadFactory, + const armnn::PermutationVector& mappings, + armnn::TensorInfo & inputTensorInfo, + const T * inputData, + std::vector& outputData) +{ + BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null"); + if (inputData == nullptr) + { + // Nullptr is an error in the test. By returning without doing the concatenation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::PermuteQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings}; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputData); + + workload->Execute(); + + outputData.resize(outputTensorInfo.GetNumElements()); + CopyDataFromITensorHandle(&outputData[0], outputHandle.get()); + inputTensorInfo = outputTensorInfo; +} + +armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation( + const std::vector & inputTensorInfos, + unsigned int concatDim) +{ + std::vector shapes; + shapes.reserve(inputTensorInfos.size()); + for (const armnn::TensorInfo& it: inputTensorInfos) + { + shapes.push_back(it.GetShape()); + } + + return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(), + shapes.end(), + concatDim); +} + +// +// Concatenation is only supported for N and C dimensions for NCHW. In case of +// <4 dimensions we need to make sure that the concat dimensions are at least +// the 3rd slowest iterating one. +// + +bool NeedPermuteForConcat( + const std::vector & inputTensorInfos, + unsigned int concatDim) +{ + // See note above. Additionally we expect the input shapes to have the + // same number of dimensions. + unsigned int nDimensions = 0; + + // Determine the number of dimensions as well as sanity check them + // agains test implementation issues. + for (auto && tensorInfo : inputTensorInfos) + { + if (!nDimensions) + { + nDimensions = tensorInfo.GetShape().GetNumDimensions(); + } + else + { + BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(), + "Input shapes must have the same number of dimensions"); + } + } + + return (nDimensions-concatDim) < 3; +} + +armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape) +{ + unsigned int numDims = inputShape.GetNumDimensions(); + if (numDims >= 3) + { + // Nothing to do if the inputShape has at least 3 dimensions. + return inputShape; + } + + std::vector newDims(size_t(3), 1u); + unsigned int expandedBy = 3 - numDims; + for (unsigned int i=0; i & permutations) +{ + BOOST_ASSERT_MSG(numDimensions <= 3, + "Only dimensions 1,2 and 3 are supported by this helper"); + + unsigned int expandedBy = 3 - numDimensions; + unsigned int expandedConcatAxis = concatDim + expandedBy; + + if (expandedConcatAxis == 2) + { + concatDim = 0; + armnn::PermutationVector forwardPermutation({1, 2, 0}); + armnn::PermutationVector reversePermutation({2, 0, 1}); + permutations = std::make_pair(forwardPermutation, reversePermutation); + } + else if (expandedConcatAxis == 1) + { + concatDim = 0; + armnn::PermutationVector forwardPermutation({2, 0, 1}); + armnn::PermutationVector reversePermutation({1, 2, 0}); + permutations = std::make_pair(forwardPermutation, reversePermutation); + } + else + { + BOOST_ASSERT(expandedConcatAxis == 0); + concatDim = 0; + } +} + +// +// Permute the input tensors so we can do a supported concatenation. +// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions +// at the front. Finally this function tells what the output shape +// of the permuted concatenated tensor is going to be. +// +template +void PermuteInputsForConcat( + armnn::IWorkloadFactory& workloadFactory, + std::vector & inputTensorInfos, + std::vector & inputData, + std::vector> & inputDataStorage, + armnn::PermutationVector & permuteVector, + unsigned int & concatDim, + armnn::TensorInfo & outputTensorInfo) +{ + BOOST_ASSERT_MSG(inputTensorInfos.size() > 1, + "Expecting more than one tensor to be concatenated here"); + + unsigned int numDims = 0; + unsigned int nthInput = 0; + const armnn::PermutationVector identity({0, 1, 2}); + + std::pair permutations = + std::make_pair(identity, identity); + + inputDataStorage.resize(inputData.size()); + + for (auto && tensorInfo : inputTensorInfos) + { + if (numDims == 0) + { + numDims = tensorInfo.GetShape().GetNumDimensions(); + Generate3dPermuteVectorForConcat(numDims, concatDim, permutations); + // Store the reverese permutation. + permuteVector = permutations.second; + BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity), + "Test logic error, we don't need permutation, so we shouldn't arrive here"); + } + else + { + BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(), + "All inputs must have the same number of dimensions"); + } + + armnn::TensorInfo newTensorInfo = tensorInfo; + newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape())); + + PermuteTensorData(workloadFactory, + permutations.first, + newTensorInfo, + inputData[nthInput], + inputDataStorage[nthInput]); + + inputData[nthInput] = inputDataStorage[nthInput].data(); + inputTensorInfos[nthInput] = newTensorInfo; + + ++nthInput; + } + + outputTensorInfo.SetShape( + armnnUtils::Permuted( + ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()), + permutations.first)); +} + + +// +// This is the pair of PermuteInputsForConcat(...) which permutes back +// the output of the concatenation so we can check it against an expected +// output. +// +template +void PermuteOutputForConcat( + armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo & tensorInfo, + const armnn::PermutationVector & permuteVector, + std::unique_ptr && inputDataHandle, + T * data) +{ + BOOST_ASSERT_MSG(data != nullptr, "data must not be null"); + if (data == nullptr) + { + // Nullptr is an error in the test. By returning without doing the permutation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::TensorInfo resultTensorInfo = tensorInfo; + std::vector inputData(tensorInfo.GetNumElements()); + std::vector outputData; + + CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get()); + + PermuteTensorData(workloadFactory, + permuteVector, + resultTensorInfo, + &inputData[0], + outputData); + + ::memcpy(data, &outputData[0], sizeof(T)*outputData.size()); +} + +template +void Concatenate(armnn::IWorkloadFactory& workloadFactory, + std::initializer_list inputTensorInfosOrig, + std::initializer_list inputsOrig, + const armnn::TensorInfo& outputTensorInfoOrig, + T * output, + unsigned int concatDim) +{ + BOOST_ASSERT_MSG(output != nullptr, "output must not be null"); + if (output == nullptr) + { + // Nullptr is an error in the test. By returning without doing the permutation + // I expect the caller to fail the test. It still makes sense to report this as + // an assert for Debug builds. + return; + } + + armnn::MergerQueueDescriptor queueDescriptor; + + // Saves a copy of the parameters which we might need to change. + std::vector inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end()); + std::vector inputs = inputsOrig; + armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig; + + armnn::PermutationVector permuteVector{0, 1, 2}; + + // Holds and automatically releases memory for the reshaped input data. + std::vector> tmpInputDataStorage; + + const size_t inputCount = inputTensorInfos.size(); + + bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim); + + if (needPermuteForConcat) + { + // + // We need to permute the inputs, because concatenation along + // the requested axis is not supported. + // + PermuteInputsForConcat(workloadFactory, + inputTensorInfos, + inputs, + tmpInputDataStorage, + permuteVector, + concatDim, + outputTensorInfo); + } + + armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim); + + queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); + for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) + { + queueDescriptor.m_ViewOrigins.emplace_back(std::vector(viewsDescriptor.GetViewOrigin(i), + viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions())); + } + + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + std::vector> inputHandles; + inputHandles.reserve(inputCount); + + const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + for (unsigned int i = 0; i < inputCount; ++i) + { + const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i]; + + std::unique_ptr inputHandle = subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(), + queueDescriptor.m_ViewOrigins[i].m_Origin.data()) + : workloadFactory.CreateTensorHandle(inputTensorInfo); + + inputHandles.emplace_back(std::move(inputHandle)); + } + + armnn::WorkloadInfo workloadInfo; + + for (unsigned int i = 0; i < inputCount; ++i) + { + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get()); + } + + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo); + + for (auto& inputHandle : inputHandles) + { + inputHandle->Allocate(); + } + + outputHandle->Allocate(); + + unsigned int nextInputId = 0; + for (auto& inputHandle : inputHandles) + { + CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]); + ++nextInputId; + } + + workloadFactory.Finalize(); + workload->Execute(); + + if (needPermuteForConcat) + { + PermuteOutputForConcat(workloadFactory, + outputTensorInfo, + permuteVector, + std::move(outputHandle), + output); + } + else + { + CopyDataFromITensorHandle(output, outputHandle.get()); + } +} + +template +LayerTestResult Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType()); + + auto input0 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { 1.0f, 2.0f, 3.0f })); + auto input1 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { 4.0f, 5.0f, 6.0f })); + auto input2 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { 7.0f, 8.0f, 9.0f })); + + armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType()); + + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor(outputTensorInfo, output); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f + })); + + return result; +} + +LayerTestResult Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation1dTestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo& outputTensorInfo, + unsigned int dimension, + const float qScale, + const int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType()); + + auto input0 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + auto input1 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, + })); + + auto input2 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 7.0f, 8.0f, 9.0f, + + // Batch 1 + 16.0f, 17.0f, 18.0f, + })); + + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + dimension); + + result.output = MakeTensor(outputTensorInfo, output); + return result; +} + +template +LayerTestResult Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType()); + + LayerTestResult result = Concatenation2dTestImpl(workloadFactory, outputTensorInfo, 0, qScale, qOffset); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + + // Batch 2 + 4.0f, 5.0f, 6.0f, + + // Batch 3 + 13.0f, 14.0f, 15.0f, + + // Batch 4 + 7.0f, 8.0f, 9.0f, + + // Batch 5 + 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0TestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType()); + + LayerTestResult result = Concatenation2dTestImpl(workloadFactory, outputTensorInfo, 1, qScale, qOffset); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f + })); + + return result; +} + +LayerTestResult Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1TestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType()); + auto input0 = MakeTensor(input0TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType()); + auto input1 = MakeTensor(input1TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, + + // Batch 0 + 7.0f, 8.0f, 9.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType()); + auto input2 = MakeTensor(input2TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 1 + 16.0f, 17.0f, 18.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType()); + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor(outputTensorInfo, output); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + + // Batch 2 + 4.0f, 5.0f, 6.0f, + + // Batch 3 + 13.0f, 14.0f, 15.0f, + + // Batch 4 + 7.0f, 8.0f, 9.0f, + + // Batch 5 + 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0DiffInputDimsTestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType()); + auto input0 = MakeTensor(input0TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, + })); + + armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType()); + auto input1 = MakeTensor(input1TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + + // Batch 1 + 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType()); + auto input2 = MakeTensor(input2TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 9.0f, + + // Batch 1 + 18.0f + })); + + armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType()); + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 1); + + result.output = MakeTensor(outputTensorInfo, output); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0 + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + + // Batch 1 + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, + })); + + return result; +} + +LayerTestResult Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1DiffInputDimsTestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory, + const armnn::TensorInfo& outputTensorInfo, + unsigned int dimension, + float qScale, + int32_t qOffset) +{ + armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType()); + + auto input0 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + auto input1 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + + // Batch 1, Channel 0 + 25.0f, 26.0f, + + // Batch 1, Channel 1 + 27.0f, 28.0f, + + // Batch 1, Channel 2 + 29.0f, 30.0f + })); + + auto input2 = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 13.0f, 14.0f, + + // Batch 0, Channel 1 + 15.0f, 16.0f, + + // Batch 0, Channel 2 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, + + // Batch 1, Channel 1 + 33.0f, 34.0f, + + // Batch 1, Channel 2 + 35.0f, 36.0f + })); + + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { inputTensorInfo, inputTensorInfo, inputTensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + dimension); + + result.output = MakeTensor(outputTensorInfo, output); + return result; +} + +template +LayerTestResult Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType()); + + LayerTestResult result = Concatenation3dTestImpl(workloadFactory, outputTensorInfo, 0, + qScale, qOffset); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 2, Channel 0 + 7.0f, 8.0f, + + // Batch 2, Channel 1 + 9.0f, 10.0f, + + // Batch 2, Channel 2 + 11.0f, 12.0f, + + // Batch 3, Channel 0 + 25.0f, 26.0f, + + // Batch 3, Channel 1 + 27.0f, 28.0f, + + // Batch 3, Channel 2 + 29.0f, 30.0f, + + // Batch 4, Channel 0 + 13.0f, 14.0f, + + // Batch 4, Channel 1 + 15.0f, 16.0f, + + // Batch 4, Channel 2 + 17.0f, 18.0f, + + // Batch 5, Channel 0 + 31.0f, 32.0f, + + // Batch 5, Channel 1 + 33.0f, 34.0f, + + // Batch 5, Channel 2 + 35.0f, 36.0f + })); + return result; +} + +LayerTestResult Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType()); + + LayerTestResult result = Concatenation3dTestImpl(workloadFactory, outputTensorInfo, 1, qScale, qOffset); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 0, Channel 3 + 7.0f, 8.0f, + + // Batch 0, Channel 4 + 9.0f, 10.0f, + + // Batch 0, Channel 5 + 11.0f, 12.0f, + + // Batch 0, Channel 6 + 13.0f, 14.0f, + + // Batch 0, Channel 7 + 15.0f, 16.0f, + + // Batch 0, Channel 8 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 1, Channel 3 + 25.0f, 26.0f, + + // Batch 1, Channel 4 + 27.0f, 28.0f, + + // Batch 1, Channel 5 + 29.0f, 30.0f, + + // Batch 1, Channel 6 + 31.0f, 32.0f, + + // Batch 1, Channel 7 + 33.0f, 34.0f, + + // Batch 1, Channel 8 + 35.0f, 36.0f + })); + + return result; +} + +LayerTestResult Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1TestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, int32_t qOffset) +{ + armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType()); + + LayerTestResult result = Concatenation3dTestImpl(workloadFactory, outputTensorInfo, 2, qScale, qOffset); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f, + })); + + return result; +} + +LayerTestResult Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2TestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType()); + auto input0 = MakeTensor(input0TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType()); + auto input1 = MakeTensor(input1TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType()); + auto input2 = MakeTensor(input2TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 25.0f, 26.0f, + + // Batch 0, Channel 1 + 27.0f, 28.0f, + + // Batch 0, Channel 2 + 29.0f, 30.0f, + + // Batch 1, Channel 0 + 13.0f, 14.0f, + + // Batch 1, Channel 1 + 15.0f, 16.0f, + + // Batch 1, Channel 2 + 17.0f, 18.0f, + + // Batch 2, Channel 0 + 31.0f, 32.0f, + + // Batch 2, Channel 1 + 33.0f, 34.0f, + + // Batch 2, Channel 2 + 35.0f, 36.0f + })); + + armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType()); + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 0); + + result.output = MakeTensor(outputTensorInfo, output); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 2, Channel 0 + 7.0f, 8.0f, + + // Batch 2, Channel 1 + 9.0f, 10.0f, + + // Batch 2, Channel 2 + 11.0f, 12.0f, + + // Batch 3, Channel 0 + 25.0f, 26.0f, + + // Batch 3, Channel 1 + 27.0f, 28.0f, + + // Batch 3, Channel 2 + 29.0f, 30.0f, + + // Batch 4, Channel 0 + 13.0f, 14.0f, + + // Batch 4, Channel 1 + 15.0f, 16.0f, + + // Batch 4, Channel 2 + 17.0f, 18.0f, + + // Batch 5, Channel 0 + 31.0f, 32.0f, + + // Batch 5, Channel 1 + 33.0f, 34.0f, + + // Batch 5, Channel 2 + 35.0f, 36.0f + })); + + return result; +} + +LayerTestResult Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0DiffInputDimsTestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType()); + auto input0 = MakeTensor(input0TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType()); + auto input1 = MakeTensor(input1TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, 8.0f, + + // Batch 0, Channel 1 + 9.0f, 10.0f, + + // Batch 0, Channel 2 + 11.0f, 12.0f, + + // Batch 0, Channel 3 + 25.0f, 26.0f, + + // Batch 1, Channel 0 + 27.0f, 28.0f, + + // Batch 1, Channel 1 + 29.0f, 30.0f, + + // Batch 1, Channel 2 + 13.0f, 14.0f, + + // Batch 1, Channel 3 + 15.0f, 16.0f, + })); + + armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType()); + auto input2 = MakeTensor(input2TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType()); + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 1); + + result.output = MakeTensor(outputTensorInfo, output); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 0, Channel 3 + 7.0f, 8.0f, + + // Batch 0, Channel 4 + 9.0f, 10.0f, + + // Batch 0, Channel 5 + 11.0f, 12.0f, + + // Batch 0, Channel 6 + 25.0f, 26.0f, + + // Batch 0, Channel 7 + 17.0f, 18.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, + + // Batch 1, Channel 3 + 27.0f, 28.0f, + + // Batch 1, Channel 4 + 29.0f, 30.0f, + + // Batch 1, Channel 5 + 13.0f, 14.0f, + + // Batch 1, Channel 6 + 15.0f, 16.0f, + + // Batch 1, Channel 7 + 31.0f, 32.0f, + })); + + return result; +} + +LayerTestResult Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1DiffInputDimsTestImpl(workloadFactory, 0.0f, 0); +} + +template +LayerTestResult Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, + int32_t qOffset) +{ + armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType()); + auto input0 = MakeTensor(input0TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f + })); + + armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType()); + auto input1 = MakeTensor(input1TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 7.0f, + + // Batch 0, Channel 1 + 9.0f, + + // Batch 0, Channel 2 + 11.0f, + + // Batch 1, Channel 0 + 25.0f, + + // Batch 1, Channel 1 + 27.0f, + + // Batch 1, Channel 2 + 29.0f + })); + + armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType()); + auto input2 = MakeTensor(input2TensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 13.0f, 14.0f, 50.0f, + + // Batch 0, Channel 1 + 15.0f, 16.0f, 51.0f, + + // Batch 0, Channel 2 + 17.0f, 18.0f, 52.0f, + + // Batch 1, Channel 0 + 31.0f, 32.0f, 53.0f, + + // Batch 1, Channel 1 + 33.0f, 34.0f, 54.0f, + + // Batch 1, Channel 2 + 35.0f, 36.0f, 55.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType()); + LayerTestResult result(outputTensorInfo); + + std::vector output; + output.resize(outputTensorInfo.GetNumElements()); + Concatenate(workloadFactory, + { input0TensorInfo, input1TensorInfo, input2TensorInfo }, + { input0.data(), input1.data(), input2.data() }, + outputTensorInfo, + output.data(), + 2); + + result.output = MakeTensor(outputTensorInfo, output); + result.outputExpected = MakeTensor(outputTensorInfo, QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f, + + // Batch 0, Channel 1 + 3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f, + + // Batch 0, Channel 2 + 5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f, + + // Batch 1, Channel 0 + 19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f, + + // Batch 1, Channel 1 + 21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f, + + // Batch 1, Channel 2 + 23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f, + })); + + return result; +} + +LayerTestResult Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2DiffInputDimsTestImpl(workloadFactory, 0.0f, 0); +} + +LayerTestResult ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 2.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 5.0f, + 3.0f, 4.0f, 5.0f, 6.0f, + 4.0f, 5.0f, 6.0f, 7.0f + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 255.0f, + 200.0f, 250.f, + })); + + // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, + // then figures out the interpolants and weights. Note this is different to projecting the centre of the + // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value + // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting + // the centre). + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1.0f + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 2.0f, 3.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 5.0f, + 3.0f, 4.0f, 5.0f, 6.0f, + 4.0f, 5.0f, 6.0f, 7.0f + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1.f, 3.f, + 3.f, 5.f + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 5; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 3; + constexpr unsigned int outputHeight = 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 2.0f, 3.0f, 5.0f, 8.0f, + 13.0f, 21.0f, 34.0f, 55.0f, 89.0f, + 144.0f, 233.0f, 377.0f, 610.0f, 987.0f + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1.0f, 2.6666f, 6.0f, + 78.5f, 179.3333f, 401.f + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 5; + constexpr unsigned int outputHeight = 3; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 2.0f, + 13.0f, 21.0f, + 144.0f, 233.0f + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1.0f, 1.4f, 1.8f, 2.f, 2.f, + 13.f, 16.2f, 19.4f, 21.f, 21.f, + 144.f, 179.6f, 215.2f, 233.f, 233.f + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int width = 2; + constexpr unsigned int height = 3; + + const armnn::TensorInfo tensorInfo({height, width }, + armnn::DataType::Float32); + auto input = MakeTensor(tensorInfo, std::vector({ + -10.0f, -5.0f, + 0.0f, 5.0f, + 10.0f, 10.0f + })); + + LayerTestResult ret(tensorInfo); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + armnn::FakeQuantizationQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); + float min = -10.f; + float max = 10.f; + + data.m_Parameters.m_Min = min; + data.m_Parameters.m_Max = max; + + armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]); + armnn::FakeQuantizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle); + + std::unique_ptr workload = workloadFactory.CreateFakeQuantization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + ret.outputExpected = MakeTensor(tensorInfo, std::vector({ + 0.0f, 63.0f, + 128.0f, 191.0f, + 255.0f, 255.0f + })); + return ret; +} + +LayerTestResult L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 1; + constexpr unsigned int inputHeight = 1; + constexpr unsigned int inputChannels = 10; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f + })); + + const float approxInvL2Norm = 0.050964719f; + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(inputTensorInfo, std::vector({ + 1.0f * approxInvL2Norm, + 2.0f * approxInvL2Norm, + 3.0f * approxInvL2Norm, + 4.0f * approxInvL2Norm, + 5.0f * approxInvL2Norm, + 6.0f * approxInvL2Norm, + 7.0f * approxInvL2Norm, + 8.0f * approxInvL2Norm, + 9.0f * approxInvL2Norm, + 10.0f * approxInvL2Norm + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +namespace +{ + +float CalcInvL2Norm(std::initializer_list elements) +{ + const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f, + [](float acc, float element) { return acc + element * element; }); + return 1.0f / sqrtf(reduction); +} + +} + +LayerTestResult L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 5; + constexpr unsigned int inputHeight = 1; + constexpr unsigned int inputChannels = 2; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1.0f, 3.0f, 5.0f, 7.0f, 9.0f, + 2.0f, 4.0f, 6.0f, 8.0f, 10.0f + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(inputTensorInfo, std::vector({ + 1.0f * CalcInvL2Norm({ 1.0f, 2.0f }), + 3.0f * CalcInvL2Norm({ 3.0f, 4.0f }), + 5.0f * CalcInvL2Norm({ 5.0f, 6.0f }), + 7.0f * CalcInvL2Norm({ 7.0f, 8.0f }), + 9.0f * CalcInvL2Norm({ 9.0f, 10.0f }), + + 2.0f * CalcInvL2Norm({ 1.0f, 2.0f }), + 4.0f * CalcInvL2Norm({ 3.0f, 4.0f }), + 6.0f * CalcInvL2Norm({ 5.0f, 6.0f }), + 8.0f * CalcInvL2Norm({ 7.0f, 8.0f }), + 10.0f * CalcInvL2Norm({ 9.0f, 10.0f }) + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 2; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + // Channel 0 + 119.0f, 21.0f, 150.0f, + 149.0f, 32.0f, 179.0f, + 15.0f, 227.0f, 141.0f, + 147.0f, 199.0f, 220.0f, + + // Channel 1 + 110.0f, 140.0f, 73.0f, + 211.0f, 212.0f, 89.0f, + 24.0f, 138.0f, 188.0f, + 162.0f, 12.0f, 161.0f, + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(inputTensorInfo, std::vector({ + 119.0f * CalcInvL2Norm({ 119.0f, 110.0f }), + 21.0f * CalcInvL2Norm({ 21.0f, 140.0f }), + 150.0f * CalcInvL2Norm({ 150.0f, 73.0f }), + 149.0f * CalcInvL2Norm({ 149.0f, 211.0f }), + 32.0f * CalcInvL2Norm({ 32.0f, 212.0f }), + 179.0f * CalcInvL2Norm({ 179.0f, 89.0f }), + 15.0f * CalcInvL2Norm({ 15.0f, 24.0f }), + 227.0f * CalcInvL2Norm({ 227.0f, 138.0f }), + 141.0f * CalcInvL2Norm({ 141.0f, 188.0f }), + 147.0f * CalcInvL2Norm({ 147.0f, 162.0f }), + 199.0f * CalcInvL2Norm({ 199.0f, 12.0f }), + 220.0f * CalcInvL2Norm({ 220.0f, 161.0f }), + + 110.0f * CalcInvL2Norm({ 119.0f, 110.0f }), + 140.0f * CalcInvL2Norm({ 21.0f, 140.0f }), + 73.0f * CalcInvL2Norm({ 150.0f, 73.0f }), + 211.0f * CalcInvL2Norm({ 149.0f, 211.0f }), + 212.0f * CalcInvL2Norm({ 32.0f, 212.0f }), + 89.0f * CalcInvL2Norm({ 179.0f, 89.0f }), + 24.0f * CalcInvL2Norm({ 15.0f, 24.0f }), + 138.0f * CalcInvL2Norm({ 227.0f, 138.0f }), + 188.0f * CalcInvL2Norm({ 141.0f, 188.0f }), + 162.0f * CalcInvL2Norm({ 147.0f, 162.0f }), + 12.0f * CalcInvL2Norm({ 199.0f, 12.0f }), + 161.0f * CalcInvL2Norm({ 220.0f, 161.0f }), + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputBatchSize = 2; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::Float32); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + // Batch 0, Channel 0 + 235.0f, 46.0f, 178.0f, + 100.0f, 123.0f, 19.0f, + 172.0f, 74.0f, 250.0f, + 6.0f, 195.0f, 80.0f, + + // Batch 0, Channel 1 + 113.0f, 95.0f, 202.0f, + 77.0f, 114.0f, 71.0f, + 122.0f, 246.0f, 166.0f, + 82.0f, 28.0f, 37.0f, + + // Batch 0, Channel 2 + 56.0f, 170.0f, 162.0f, + 194.0f, 89.0f, 254.0f, + 12.0f, 209.0f, 200.0f, + 1.0f, 64.0f, 54.0f, + + // Batch 1, Channel 0 + 67.0f, 90.0f, 49.0f, + 7.0f, 163.0f, 18.0f, + 25.0f, 117.0f, 103.0f, + 247.0f, 59.0f, 189.0f, + + // Batch 1, Channel 1 + 239.0f, 104.0f, 199.0f, + 17.0f, 124.0f, 153.0f, + 222.0f, 217.0f, 75.0f, + 32.0f, 126.0f, 21.0f, + + // Batch 1, Channel 2 + 97.0f, 145.0f, 215.0f, + 115.0f, 116.0f, 238.0f, + 226.0f, 16.0f, 132.0f, + 92.0f, 125.0f, 88.0f, + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(inputTensorInfo, std::vector({ + + // Batch 0, Channel 0 + 235.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 46.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 100.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 123.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 19.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 172.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 74.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 6.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 195.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 80.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 0, Channel 1 + 113.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 95.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 77.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 114.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 71.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 122.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 246.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 82.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 28.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 37.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 0, Channel 2 + 56.0f * CalcInvL2Norm({ 235.0f, 113.0f, 56.0f }), + 170.0f * CalcInvL2Norm({ 46.0f, 95.0f, 170.0f }), + 162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }), + 194.0f * CalcInvL2Norm({ 100.0f, 77.0f, 194.0f }), + 89.0f * CalcInvL2Norm({ 123.0f, 114.0f, 89.0f }), + 254.0f * CalcInvL2Norm({ 19.0f, 71.0f, 254.0f }), + 12.0f * CalcInvL2Norm({ 172.0f, 122.0f, 12.0f }), + 209.0f * CalcInvL2Norm({ 74.0f, 246.0f, 209.0f }), + 200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }), + 1.0f * CalcInvL2Norm({ 6.0f, 82.0f, 1.0f }), + 64.0f * CalcInvL2Norm({ 195.0f, 28.0f, 64.0f }), + 54.0f * CalcInvL2Norm({ 80.0f, 37.0f, 54.0f }), + + // Batch 1, Channel 0 + 67.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 90.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 49.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 7.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 18.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 25.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 117.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 103.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 247.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 59.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 189.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + + // Batch 1, Channel 1 + 239.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 104.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 199.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 17.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 153.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 222.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 217.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 75.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 32.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 126.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 21.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + + // Batch 1, Channel 2 + 97.0f * CalcInvL2Norm({ 67.0f, 239.0f, 97.0f }), + 145.0f * CalcInvL2Norm({ 90.0f, 104.0f, 145.0f }), + 215.0f * CalcInvL2Norm({ 49.0f, 199.0f, 215.0f }), + 115.0f * CalcInvL2Norm({ 7.0f, 17.0f, 115.0f }), + 116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }), + 238.0f * CalcInvL2Norm({ 18.0f, 153.0f, 238.0f }), + 226.0f * CalcInvL2Norm({ 25.0f, 222.0f, 226.0f }), + 16.0f * CalcInvL2Norm({ 117.0f, 217.0f, 16.0f }), + 132.0f * CalcInvL2Norm({ 103.0f, 75.0f, 132.0f }), + 92.0f * CalcInvL2Norm({ 247.0f, 32.0f, 92.0f }), + 125.0f * CalcInvL2Norm({ 59.0f, 126.0f, 125.0f }), + 88.0f * CalcInvL2Norm({ 189.0f, 21.0f, 88.0f }), + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::L2NormalizationQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateL2Normalization(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +template +LayerTestResult ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory, + float qScale, + int32_t qOffset) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputBatchSize = 2; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType()); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(qScale, qOffset, { + // Batch 0, Channel 0 + 235.0f, 46.0f, 178.0f, + 100.0f, 123.0f, 19.0f, + 172.0f, 74.0f, 250.0f, + 6.0f, 195.0f, 80.0f, + + // Batch 0, Channel 1 + 113.0f, 95.0f, 202.0f, + 77.0f, 114.0f, 71.0f, + 122.0f, 246.0f, 166.0f, + 82.0f, 28.0f, 37.0f, + + // Batch 0, Channel 2 + 56.0f, 170.0f, 162.0f, + 194.0f, 89.0f, 254.0f, + 12.0f, 209.0f, 200.0f, + 1.0f, 64.0f, 54.0f, + + // Batch 1, Channel 0 + 67.0f, 90.0f, 49.0f, + 7.0f, 163.0f, 18.0f, + 25.0f, 117.0f, 103.0f, + 247.0f, 59.0f, 189.0f, + + // Batch 1, Channel 1 + 239.0f, 104.0f, 199.0f, + 17.0f, 124.0f, 153.0f, + 222.0f, 217.0f, 75.0f, + 32.0f, 126.0f, 21.0f, + + // Batch 1, Channel 2 + 97.0f, 145.0f, 215.0f, + 115.0f, 116.0f, 238.0f, + 226.0f, 16.0f, 132.0f, + 92.0f, 125.0f, 88.0f, + }))); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo); + AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]); + + armnn::ConstantQueueDescriptor descriptor; + descriptor.m_LayerOutput = &constantTensor; + + armnn::WorkloadInfo info; + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateConstant(descriptor, info); + + outputHandle->Allocate(); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ConstantTest(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl(workloadFactory, 0.0f, 0); +} + +LayerTestResult ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl(workloadFactory, 1.0f, 0); +} + +LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int outputWidth = 3; + unsigned int outputHeight = 6; + unsigned int outputChannels = 3; + + unsigned int inputWidth1 = 3; + unsigned int inputHeight1 = 6; + unsigned int inputChannels1 = 2; + + unsigned int inputWidth2 = 3; + unsigned int inputHeight2 = 6; + unsigned int inputChannels2 = 1; + + // Defines the tensor descriptors. + armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8); + + // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize them. + const float scale = 0.13497836f; + const int32_t offset = -7; + + outputTensorInfo.SetQuantizationScale(scale); + outputTensorInfo.SetQuantizationOffset(offset); + inputTensorInfo1.SetQuantizationScale(scale); + inputTensorInfo1.SetQuantizationOffset(offset); + inputTensorInfo2.SetQuantizationScale(scale); + inputTensorInfo2.SetQuantizationOffset(offset); + + LayerTestResult ret(outputTensorInfo); + + ret.outputExpected = MakeTensor(outputTensorInfo, std::vector( + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11, 12, + 13, 14, 15, + 16, 17, 18, + + 19, 20, 21, + 22, 23, 24, + 25, 26, 27, + 28, 29, 30, + 31, 32, 33, + 34, 35, 36, + + 37, 38, 39, + 40, 41, 42, + 43, 44, 45, + 46, 47, 48, + 49, 50, 51, + 52, 53, 54, + }) + ); + + auto input1 = MakeTensor(inputTensorInfo1, std::vector( + { + 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 10, 11, 12, + 13, 14, 15, + 16, 17, 18, + + 19, 20, 21, + 22, 23, 24, + 25, 26, 27, + 28, 29, 30, + 31, 32, 33, + 34, 35, 36, + }) + ); + + auto input2 = MakeTensor(inputTensorInfo2, std::vector( + { + 37, 38, 39, + 40, 41, 42, + 43, 44, 45, + 46, 47, 48, + 49, 50, 51, + 52, 53, 54, + }) + ); + + std::vector wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0]. + armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1]. + armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2); + + + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr inputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo1); + + std::unique_ptr inputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(inputTensorInfo2); + + + armnn::MergerQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + + std::unique_ptr workload = workloadFactory.CreateMerger(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + + const float scale = 7.0f; + const int32_t offset = 3; + + armnn::TensorInfo inputTensorInfo1, inputTensorInfo2; + armnn::TensorInfo outputTensorInfo; + + const unsigned int shape[] = { batchSize, channels, height, width }; + inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo1.SetQuantizationScale(scale); + inputTensorInfo1.SetQuantizationOffset(offset); + + inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo2.SetQuantizationScale(scale); + inputTensorInfo2.SetQuantizationOffset(offset); + + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(scale); + outputTensorInfo.SetQuantizationOffset(offset); + + // See dequantized values to the right. + auto input1 = MakeTensor(inputTensorInfo1, std::vector( + { + 63, 35, 77, 70, 56, 112, // 420, 224, 518, 469, 371, 763 + 203, 28, 252, 168, 245, 91 // 1400, 175, 1743, 1155, 1694, 616 + })); + + // See dequantized values to the right. + auto input2 = MakeTensor(inputTensorInfo1, std::vector( + { + 21, 7, 175, 231, 175, 210, // 126, 28, 1204, 1596, 1204, 1449 + 126, 161, 63, 21, 105, 126 // 861, 1106, 420, 126, 714, 861 + })); + + // See dequantized values to the right. + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector( + { + 81, 39, 249, 255, 228, 255, // 546, 252, 1722, 2065(clamped), 1575, 2212(clamped) + 255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477 + })); + + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::AdditionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateAddition(data, info); + + inputHandle1->Allocate(); + inputHandle2->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} + +namespace +{ +LayerTestResult MultiplicationUint8TestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector & values0, + float scale0, + int32_t offset0, + const unsigned int shape1[4], + const std::vector & values1, + float scale1, + int32_t offset1, + const unsigned int outShape[4], + const std::vector & outValues, + float outScale, + int32_t outOffset) +{ + armnn::TensorInfo inputTensorInfo0(4, shape0, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo inputTensorInfo1(4, shape1, armnn::DataType::QuantisedAsymm8); + armnn::TensorInfo outputTensorInfo(4, outShape, armnn::DataType::QuantisedAsymm8); + + inputTensorInfo0.SetQuantizationScale(scale0); + inputTensorInfo0.SetQuantizationOffset(offset0); + + inputTensorInfo1.SetQuantizationScale(scale1); + inputTensorInfo1.SetQuantizationOffset(offset1); + + outputTensorInfo.SetQuantizationScale(outScale); + outputTensorInfo.SetQuantizationOffset(outOffset); + + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, outValues); + + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::MultiplicationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateMultiplication(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} +} // anonymous namespace + +LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + unsigned int batchSize = 1; + unsigned int channels = 2; + unsigned int height = 2; + unsigned int width = 3; + const unsigned int shape[] = { batchSize, channels, height, width }; + + // See dequantized values to the right. + std::vector input0({ + 62, 37, 3, 172, 13, 111, // 244, 144, 8, 684, 48, 440, + 188, 20, 73, 31, 23, 31 // 748, 76, 288, 120, 88, 120 + }); + + // See dequantized values to the right. + std::vector input1({ + 126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747, + 48, 115, 151, 79, 78, 97 // 150, 351, 459, 243, 240, 297 + }); + + // See dequantized values to the right. + std::vector output( + { + 64, 72, 0, 255, 8, 236, // 93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680, + 77, 15, 92, 16, 10, 21, // 112200, 26676, 132192, 29160, 21120, 35640 + }); + + return MultiplicationUint8TestHelper(workloadFactory, + shape, + input0, + 4.0f, + 1, + shape, + input1, + 3.0f, + -2, + shape, + output, + 1366.255f, // Scale/offset chosen to have output values out of range. + -5); +} + +LayerTestResult MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 2, 2, 3 }; + const unsigned int shape1[] = { 1, 1, 1, 1 }; + + std::vector input0({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12 + }); + + std::vector input1({2}); + + std::vector output({ + 2, 4, 6, 8, 10, 12, + 14, 16, 18, 20, 22, 24 + }); + + return MultiplicationUint8TestHelper(workloadFactory, + shape0, + input0, + 1.0f, + 0, + shape1, + input1, + 1.0f, + 0, + shape0, + output, + 1.0f, + 0); +} + +LayerTestResult MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 2, 2, 3 }; + const unsigned int shape1[] = { 1, 1, 1, 3 }; + + std::vector input0({ + 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12 + }); + + std::vector input1({1, 2, 3}); + + std::vector output({ + 1, 4, 9, 4, 10, 18, + 7, 16, 27, 10, 22, 36 + }); + + return MultiplicationUint8TestHelper(workloadFactory, + shape0, + input0, + 1.0f, + 0, + shape1, + input1, + 1.0f, + 0, + shape0, + output, + 1.0f, + 0); +} + +namespace +{ +template +LayerTestResult SubtractionTestHelper(armnn::IWorkloadFactory& workloadFactory, + const unsigned int shape0[4], + const std::vector& values0, + float scale0, + int32_t offset0, + const unsigned int shape1[4], + const std::vector & values1, + float scale1, + int32_t offset1, + const unsigned int outShape[4], + const std::vector & outValues, + float outScale, + int32_t outOffset) +{ + auto dataType = (std::is_same::value ? + armnn::DataType::QuantisedAsymm8 : + armnn::DataType::Float32); + + armnn::TensorInfo inputTensorInfo0(4, shape0, dataType); + armnn::TensorInfo inputTensorInfo1(4, shape1, dataType); + armnn::TensorInfo outputTensorInfo(4, outShape, dataType); + + inputTensorInfo0.SetQuantizationScale(scale0); + inputTensorInfo0.SetQuantizationOffset(offset0); + + inputTensorInfo1.SetQuantizationScale(scale1); + inputTensorInfo1.SetQuantizationOffset(offset1); + + outputTensorInfo.SetQuantizationScale(outScale); + outputTensorInfo.SetQuantizationOffset(outOffset); + + auto input0 = MakeTensor(inputTensorInfo0, values0); + auto input1 = MakeTensor(inputTensorInfo1, values1); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, outValues); + + std::unique_ptr inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0); + std::unique_ptr inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SubtractionQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get()); + AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateSubtraction(data, info); + + inputHandle0->Allocate(); + inputHandle1->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]); + CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + return result; +} +} // anonymous namespace + +LayerTestResult SubtractionUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 1, 2, 2 }; + const unsigned int shape1[] = { 1, 1, 2, 2 }; + + std::vector input0({ 10, 12, 14, 16 }); + std::vector input1({ 1, 2, 1, 2 }); + std::vector output({ 3, 3, 5, 5 }); + + return SubtractionTestHelper(workloadFactory, + shape0, input0, 0.5f, 2, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult SubtractionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 1, 2, 2 }; + const unsigned int shape1[] = { 1, 1, 1, 1 }; + + std::vector input0({ 10, 12, 14, 16 }); + std::vector input1({ 2 }); + std::vector output({ 5, 6, 7, 8 }); + + return SubtractionTestHelper(workloadFactory, + shape0, input0, 0.5f, 2, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 3); +} + +LayerTestResult SubtractionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 1, 2, 2 }; + const unsigned int shape1[] = { 1, 1, 2, 1 }; + + std::vector input0({ 10, 12, 14, 16 }); + std::vector input1({ 2, 1 }); + std::vector output({ 8, 11, 12, 15 }); + + return SubtractionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult SubtractionTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 1, 2, 2 }; + const unsigned int shape1[] = { 1, 1, 2, 2 }; + + std::vector input0({ 1, 2, 3, 4 }); + std::vector input1({ 1, -1, 0, 2 }); + std::vector output({ 0, 3, 3, 2 }); + + return SubtractionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult SubtractionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 1, 2, 2 }; + const unsigned int shape1[] = { 1, 1, 1, 1 }; + + std::vector input0({ 1, 2, 3, 4 }); + std::vector input1({ 10 }); + std::vector output({ -9, -8, -7, -6 }); + + return SubtractionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult SubtractionBroadcastTest(armnn::IWorkloadFactory& workloadFactory) +{ + const unsigned int shape0[] = { 1, 1, 2, 2 }; + const unsigned int shape1[] = { 1, 1, 1, 2 }; + + std::vector input0({ 1, 2, 3, 4 }); + std::vector input1({ 10, -5 }); + std::vector output({ -9, 7, -7, 9 }); + + return SubtractionTestHelper(workloadFactory, + shape0, input0, 1.0f, 0, + shape1, input1, 1.0f, 0, + shape0, output, 1.0f, 0); +} + +LayerTestResult ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.5f); + inputTensorInfo.SetQuantizationOffset(-3); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.5f); + outputTensorInfo.SetQuantizationOffset(-3); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7 + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = input; + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.1567f); + inputTensorInfo.SetQuantizationOffset(1); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(0.1567f); + outputTensorInfo.SetQuantizationOffset(1); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1, 255, + 200, 250 + })); + + // The 'resize bilinear' operation projects the top-left corner of output texels into the input image, + // then figures out the interpolants and weights. Note this is different to projecting the centre of the + // output texel - and thus we'll expect the output 1x1 matrix to contain, as its single element, the value + // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting + // the centre). + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1 + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 4; + constexpr unsigned int inputHeight = 4; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = inputWidth / 2; + constexpr unsigned int outputHeight = inputHeight / 2; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(3.141592f); + inputTensorInfo.SetQuantizationOffset(3); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(3.141592f); + outputTensorInfo.SetQuantizationOffset(3); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1, 2, 3, 4, + 2, 3, 4, 5, + 3, 4, 5, 6, + 4, 5, 6, 7 + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1, 3, + 3, 5 + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 3; + constexpr unsigned int inputHeight = 2; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 2; + constexpr unsigned int outputHeight = 1; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.5f); + inputTensorInfo.SetQuantizationOffset(-1); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.5f); + outputTensorInfo.SetQuantizationOffset(-1); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 1, 2, 3, // 3.0, 4.5, 6.0 + 5, 8, 13 // 9.0, 13.5, 21.0 + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 1, 3 // 3.0, 5.25 + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + constexpr unsigned int inputWidth = 2; + constexpr unsigned int inputHeight = 3; + constexpr unsigned int inputChannels = 1; + constexpr unsigned int inputBatchSize = 1; + + constexpr unsigned int outputWidth = 5; + constexpr unsigned int outputHeight = 3; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputBatchSize = inputBatchSize; + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(0.010765f); + inputTensorInfo.SetQuantizationOffset(7); + + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(0.010132f); + outputTensorInfo.SetQuantizationOffset(-18); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + 24, 228, // 0.183005, 2.379065, + 105, 128, // 1.05497, 1.302565 + 230, 71 // 2.400595, 0.68896 + })); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, std::vector({ + 0, 87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504 + 86, 96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498 + 219, 151, 84, 50, 50 // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002 + })); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ResizeBilinearQueueDescriptor descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateResizeBilinear(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + return result; +} + +LayerTestResult BatchNormTest(armnn::IWorkloadFactory& workloadFactory) +{ + auto ret = BatchNormTestImpl(workloadFactory, 0.f, 0); + return ret; +} + +LayerTestResult BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + auto ret = BatchNormTestImpl(workloadFactory, 1.f/20.f, 50); + return ret; +} + +LayerTestResult ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return ConstantTestImpl(workloadFactory, 2e-6f, 1); +} + +LayerTestResult Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation1dTestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0TestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1TestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim0DiffInputDimsTestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation2dDim1DiffInputDimsTestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1TestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2TestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim0TestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim1DiffInputDimsTestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return Concatenation3dDim2DiffInputDimsTestImpl(workloadFactory, 0.5f, -1); +} + +LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize2x2Stride2x2TestCommon(workloadFactory, forceNoPadding); +} + +LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize2x2Stride2x2TestCommon(workloadFactory, forceNoPadding, 3.0f, -5); +} + +LayerTestResult SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize3x3Stride2x4TestCommon(workloadFactory, forceNoPadding); +} + +LayerTestResult SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return SimpleMaxPooling2dSize3x3Stride2x4TestCommon(workloadFactory, forceNoPadding, 0.1f, 128); +} + +LayerTestResult SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleAveragePooling2dTestCommon(workloadFactory); +} + +LayerTestResult SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleAveragePooling2dTestCommon(workloadFactory, 0.5, -1); +} + +LayerTestResult IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding) +{ + return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon(workloadFactory, forceNoPadding); +} + +LayerTestResult LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return LargeTensorsAveragePooling2dTestCommon(workloadFactory); +} + +LayerTestResult LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return LargeTensorsAveragePooling2dTestCommon(workloadFactory, 0.5, -1); +} + +LayerTestResult SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleL2Pooling2dTestCommon(workloadFactory); +} + +LayerTestResult SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimpleL2Pooling2dTestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride1TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride1TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride3TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride3TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride4TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize3Stride4TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize7TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize7TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize9TestCommon(workloadFactory); +} + +LayerTestResult L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return L2Pooling2dSize9TestCommon(workloadFactory); +} + +LayerTestResult AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return AsymmetricNonSquarePooling2dTestCommon(workloadFactory); +} + +LayerTestResult AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return AsymmetricNonSquarePooling2dTestCommon(workloadFactory); +} + +LayerTestResult ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType) +{ + return ComparePooling2dTestCommon(workloadFactory, refWorkloadFactory, poolingType); +} + +LayerTestResult ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType) +{ + return ComparePooling2dTestCommon(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128); +} + +LayerTestResult FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights) +{ + return FullyConnectedLargeTestCommon(workloadFactory, transposeWeights); +} + +LayerTestResult IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleMaxPooling2dTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleMaxPooling2dTestCommon(workloadFactory, 1.0f, -5); +} + +LayerTestResult IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingMaxPooling2dSize3TestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingMaxPooling2dSize3TestCommon(workloadFactory, 1.0f, -5); +} + +LayerTestResult IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( + armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingAveragePooling2dSize3TestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingAveragePooling2dSize3TestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleL2Pooling2dTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingSimpleL2Pooling2dTestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingL2Pooling2dSize3TestCommon(workloadFactory); +} + +LayerTestResult IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return IgnorePaddingL2Pooling2dSize3TestCommon(workloadFactory); +} + +LayerTestResult SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimplePermuteFloat32TestCommon(workloadFactory); +}; + +LayerTestResult SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + return SimplePermuteUint8TestCommon(workloadFactory); +}; + +LayerTestResult PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory) +{ + return PermuteFloat32ValueSet1TestCommon(workloadFactory); +}; + +LayerTestResult PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory) +{ + return PermuteFloat32ValueSet2TestCommon(workloadFactory); +}; + +LayerTestResult PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory) +{ + return PermuteFloat32ValueSet3TestCommon(workloadFactory); +}; \ No newline at end of file diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp new file mode 100644 index 0000000000..365a1f53d4 --- /dev/null +++ b/src/backends/test/LayerTests.hpp @@ -0,0 +1,345 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "armnn/ArmNN.hpp" +#include "armnn/Tensor.hpp" +#include "Half.hpp" + +#include +#include +#include + +// Layer callables. + +namespace armnn +{ +class IWorkloadFactory; +} + +template +boost::array GetTensorShapeAsArray(const armnn::TensorInfo& tensorInfo) +{ + BOOST_ASSERT_MSG(n == tensorInfo.GetNumDimensions(), + "Attempting to construct a shape array of mismatching size"); + + boost::array shape; + for (unsigned int i = 0; i < n; i++) + { + shape[i] = tensorInfo.GetShape()[i]; + } + return shape; +} + +template +struct LayerTestResult +{ + LayerTestResult(const armnn::TensorInfo& outputInfo) + { + auto shape( GetTensorShapeAsArray(outputInfo) ); + output.resize(shape); + outputExpected.resize(shape); + supported = true; + } + + boost::multi_array output; + boost::multi_array outputExpected; + bool supported; +}; + +LayerTestResult SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult +Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory); + + +LayerTestResult Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); +LayerTestResult Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +LayerTestResult DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +LayerTestResult DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding ); +LayerTestResult IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding); +LayerTestResult IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test( + armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType); +LayerTestResult ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType); + +LayerTestResult ConstantLinearActivationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta); +LayerTestResult SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta); + +LayerTestResult SimpleSigmoidTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleSigmoidUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +template +LayerTestResult CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod); + +LayerTestResult CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, float beta); + +LayerTestResult FullyConnectedFloat32Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled, + bool transposeWeights); + +std::vector> SplitterTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult MergerTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult AdditionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult SubtractionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SubtractionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SubtractionBroadcastTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareActivationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f, + unsigned int batchSize); + +LayerTestResult DivisionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionByZeroTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult MultiplicationTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1DVectorTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult BatchNormTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory); + +LayerTestResult BoundedReLuUpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult BoundedReLuUint8UpperAndLowerBoundTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareBoundedReLuTest(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float upperBound, + float lowerBound); + +// Tests that the output should be identical to the input when the output dimensions match the input ones. +LayerTestResult ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests the behaviour of the resize bilinear operation when rescaling a 2x2 image into a 1x1 image. +LayerTestResult SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests the resize bilinear for minification of a square input matrix (also: input dimensions are a +// multiple of output dimensions). +LayerTestResult ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests the resize bilinear for minification (output dimensions smaller than input dimensions). +LayerTestResult ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory); + +// Tests the resize bilinear for magnification (output dimensions bigger than input dimensions). +LayerTestResult ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult BatchNormTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult ConstantTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, float upperBound); +LayerTestResult BoundedReLuUint8Test(armnn::IWorkloadFactory& workloadFactory, + float upperBound, + float lowerBound); + +LayerTestResult FullyConnectedUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled); + +std::vector> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult MergerUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SubtractionUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SubtractionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SubtractionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult CompareActivationUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::ActivationFunction f); + +LayerTestResult CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta); + +LayerTestResult MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult MultiplicationBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult DivisionUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult DivisionBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory, + bool biasEnabled); + +LayerTestResult ConstantLinearActivationUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory); + + +LayerTestResult FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory, + bool transposeWeights); +LayerTestResult SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult PermuteFloat32ValueSet1Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult PermuteFloat32ValueSet2Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult PermuteFloat32ValueSet3Test(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest + (armnn::IWorkloadFactory& workloadFactory); +LayerTestResult + LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult +LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(armnn::IWorkloadFactory& workloadFactory); + +LayerTestResult SimpleConvertFp16ToFp32Test(armnn::IWorkloadFactory& workloadFactory); +LayerTestResult SimpleConvertFp32ToFp16Test(armnn::IWorkloadFactory& workloadFactory); diff --git a/src/backends/test/LstmTestImpl.hpp b/src/backends/test/LstmTestImpl.hpp new file mode 100644 index 0000000000..2c4e166084 --- /dev/null +++ b/src/backends/test/LstmTestImpl.hpp @@ -0,0 +1,1150 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include +#include "backends/WorkloadFactory.hpp" + +LayerTestResult LstmNoCifgNoPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + unsigned int batchSize = boost::numeric_cast(input.shape()[0]); + unsigned int inputSize = boost::numeric_cast(input.shape()[1]); + unsigned int outputSize = boost::numeric_cast(outputExpected.shape()[1]); + // cellSize and outputSize have the same size when there is no projection. + unsigned numUnits = outputSize; + + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, armnn::GetDataType()); + + + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, armnn::GetDataType()); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + + + LayerTestResult ret(outputTensorInfo); + + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); + auto inputTensor = MakeTensor(inputTensorInfo, inputVector); + + std::vector cellStateInVector(batchSize * numUnits, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector scratchBufferVector(batchSize * numUnits * 3, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + + std::vector cellStateOutVector(batchSize * numUnits, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); + ret.outputExpected = MakeTensor(outputTensorInfo, outputVector); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + + std::unique_ptr scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + + armnn::LstmQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::TensorInfo tensorInfo4({numUnits}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo8({numUnits, 2}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo16({numUnits, 4}, armnn::GetDataType()); + + auto inputToInputWeights = MakeTensor(tensorInfo8, {-0.45018822f, -0.02338299f, -0.0870589f, + -0.34550029f, 0.04266912f, -0.15680569f, + -0.34856534f, 0.43890524f}); + + auto inputToForgetWeights = MakeTensor(tensorInfo8, {0.09701663f, 0.20334584f, -0.50592935f, + -0.31343272f, -0.40032279f, 0.44781327f, + 0.01387155f, -0.35593212f}); + + auto inputToCellWeights = MakeTensor(tensorInfo8, {-0.50013041f, 0.1370284f, 0.11810488f, 0.2013163f, + -0.20583314f, 0.44344562f, 0.22077113f, + -0.29909778f}); + + auto inputToOutputWeights = MakeTensor(tensorInfo8, {-0.25065863f, -0.28290087f, 0.04613829f, + 0.40525138f, 0.44272184f, 0.03897077f, + -0.1556896f, 0.19487578f}); + + auto recurrentToInputWeights = MakeTensor(tensorInfo16, {-0.0063535f, -0.2042388f, 0.31454784f, + -0.35746509f, 0.28902304f, 0.08183324f, + -0.16555229f, 0.02286911f, -0.13566875f, + 0.03034258f, 0.48091322f, -0.12528998f, + 0.24077177f, -0.51332325f, -0.33502164f, + 0.10629296f}); + + auto recurrentToForgetWeights = MakeTensor(tensorInfo16, {-0.48684245f, -0.06655136f, 0.42224967f, + 0.2112639f, 0.27654213f, 0.20864892f, + -0.07646349f, 0.45877004f, 0.00141793f, + -0.14609534f, 0.36447752f, 0.09196436f, + 0.28053468f, 0.01560611f, -0.20127171f, + -0.01140004f}); + + auto recurrentToCellWeights = MakeTensor(tensorInfo16, {-0.3407414f, 0.24443203f, -0.2078532f, + 0.26320225f, 0.05695659f, -0.00123841f, + -0.4744786f, -0.35869038f, -0.06418842f, + -0.13502428f, -0.501764f, 0.22830659f, + -0.46367589f, 0.26016325f, -0.03894562f, + -0.16368064f}); + + auto recurrentToOutputWeights = MakeTensor(tensorInfo16, {0.43385774f, -0.17194885f, 0.2718237f, + 0.09215671f, 0.24107647f, -0.39835793f, + 0.18212086f, 0.01301402f, 0.48572797f, + -0.50656658f, 0.20047462f, -0.20607421f, + -0.51818722f, -0.15390486f, 0.0468148f, + 0.39922136f}); + + auto cellToInputWeights = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + auto inputGateBias = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + auto forgetGateBias = MakeTensor(tensorInfo4, {1., 1., 1., 1.}); + + auto cellBias = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + auto outputGateBias = MakeTensor(tensorInfo4, {0., 0., 0., 0.}); + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo8); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo16); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4); + + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = false; + data.m_Parameters.m_ProjectionEnabled = false; + + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; +} + + +LayerTestResult +LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& outputExpected) { + + unsigned int batchSize = 2; + unsigned int outputSize = 16; + unsigned int inputSize = 5; + unsigned numUnits = 20; + + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, armnn::GetDataType()); + + // Scratch buffer size without CIFG [batchSize, numUnits * 3] + armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 3}, armnn::GetDataType()); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, armnn::GetDataType()); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + + LayerTestResult ret(outputTensorInfo); + + std::vector inputVector; + inputVector.assign(input.data(), input.data() + (batchSize * inputSize)); + auto inputTensor = MakeTensor(inputTensorInfo, inputVector); + + std::vector cellStateInVector(batchSize * numUnits, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector scratchBufferVector(batchSize * numUnits * 3, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + + std::vector cellStateOutVector(batchSize * numUnits, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + + std::vector outputVector; + outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize)); + ret.outputExpected = MakeTensor(outputTensorInfo, outputVector); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + + std::unique_ptr scratchHandle = workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::LstmQueueDescriptor data; + armnn::WorkloadInfo info; + + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + armnn::TensorInfo tensorInfo16({outputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo20({numUnits}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo20x5({numUnits, inputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo20x16({numUnits, outputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfo16x20({outputSize, numUnits}, armnn::GetDataType()); + + auto inputToInputWeights = + MakeTensor(tensorInfo20x5, {0.021393683f,0.06124551f, 0.046905167f,-0.014657677f,-0.03149463f, + 0.09171803f, 0.14647801f,0.10797193f, -0.0057968358f,0.0019193048f, + -0.2726754f, 0.10154029f, -0.018539885f, 0.080349885f, -0.10262385f, + -0.022599787f,-0.09121155f, -0.008675967f, -0.045206103f,-0.0821282f, + -0.008045952f,0.015478081f, 0.055217247f, 0.038719587f, 0.044153627f, + -0.06453243f,0.05031825f, -0.046935108f, -0.008164439f, 0.014574226f, + -0.1671009f, -0.15519552f, -0.16819797f,-0.13971269f,-0.11953059f, + 0.25005487f, -0.22790983f, 0.009855087f, -0.028140958f, -0.11200698f, + 0.11295408f, -0.0035217577f, 0.054485075f, 0.05184695f, 0.064711206f, + 0.10989193f, 0.11674786f, 0.03490607f, 0.07727357f, 0.11390585f, + -0.1863375f, -0.1034451f, -0.13945189f, -0.049401227f, -0.18767063f, + 0.042483903f, 0.14233552f, 0.13832581f, 0.18350165f, 0.14545603f, + -0.028545704f,0.024939531f,0.050929718f,0.0076203286f,-0.0029723682f, + -0.042484224f, -0.11827596f, -0.09171104f, -0.10808628f,-0.16327988f, + -0.2273378f, -0.0993647f, -0.017155107f,0.0023917493f,0.049272764f, + 0.0038534778f, 0.054764505f, 0.089753784f, 0.06947234f, 0.08014476f, + -0.04544234f, -0.0497073f,-0.07135631f, -0.048929106f,-0.004042012f, + -0.009284026f, 0.018042054f, 0.0036860977f,-0.07427302f, -0.11434604f, + -0.018995456f, 0.031487543f, 0.012834908f,0.019977754f,0.044256654f, + -0.39292613f, -0.18519334f, -0.11651281f,-0.06809892f, 0.011373677f + }); + + auto inputToForgetWeights = + MakeTensor(tensorInfo20x5, {-0.0018401089f, -0.004852237f,0.03698424f, 0.014181704f,0.028273236f, + -0.016726194f, -0.05249759f,-0.10204261f, 0.00861066f,-0.040979505f, + -0.009899187f,0.01923892f,-0.028177269f, -0.08535103f,-0.14585495f, + 0.10662567f,-0.01909731f,-0.017883534f,-0.0047269356f,-0.045103323f, + 0.0030784295f,0.076784775f,0.07463696f, 0.094531395f,0.0814421f, + -0.12257899f, -0.033945758f,-0.031303465f, 0.045630626f,0.06843887f, + -0.13492945f, -0.012480007f,-0.0811829f, -0.07224499f,-0.09628791f, + 0.045100946f,0.0012300825f, 0.013964662f, 0.099372394f,0.02543059f, + 0.06958324f, 0.034257296f, 0.0482646f, 0.06267997f,0.052625068f, + 0.12784666f, 0.07077897f, 0.025725935f, 0.04165009f,0.07241905f, + 0.018668644f, -0.037377294f,-0.06277783f,-0.08833636f,-0.040120605f, + -0.011405586f,-0.007808335f,-0.010301386f,-0.005102167f,0.027717464f, + 0.05483423f, 0.11449111f, 0.11289652f,0.10939839f, 0.13396506f, + -0.08402166f,-0.01901462f, -0.044678304f,-0.07720565f,0.014350063f, + -0.11757958f, -0.0652038f, -0.08185733f,-0.076754324f,-0.092614375f, + 0.10405491f, 0.052960336f, 0.035755895f,0.035839386f,-0.012540553f, + 0.036881298f, 0.02913376f, 0.03420159f,0.05448447f,-0.054523353f, + 0.02582715f, 0.02327355f, -0.011857179f,-0.0011980024f,-0.034641717f, + -0.026125094f,-0.17582615f,-0.15923657f,-0.27486774f,-0.0006143371f, + 0.0001771948f, -8.470171e-05f, 0.02651807f,0.045790765f,0.06956496f + }); + + auto inputToCellWeights = + MakeTensor(tensorInfo20x5, {-0.04580283f, -0.09549462f, -0.032418985f, -0.06454633f, + -0.043528453f, 0.043018587f, -0.049152344f, -0.12418144f, + -0.078985475f, -0.07596889f, 0.019484362f, -0.11434962f, + -0.0074034138f, -0.06314844f, -0.092981495f, 0.0062155537f, + -0.025034338f, -0.0028890965f, 0.048929527f, 0.06235075f, + 0.10665918f, -0.032036792f, -0.08505916f, -0.10843358f, + -0.13002433f, -0.036816437f, -0.02130134f, -0.016518239f, + 0.0047691227f, -0.0025825808f, 0.066017866f, 0.029991534f, + -0.10652836f, -0.1037554f, -0.13056071f, -0.03266643f, + -0.033702414f, -0.006473424f, -0.04611692f, 0.014419339f, + -0.025174323f, 0.0396852f, 0.081777506f, 0.06157468f, + 0.10210095f, -0.009658194f, 0.046511717f, 0.03603906f, + 0.0069369148f, 0.015960095f, -0.06507666f, 0.09551598f, + 0.053568836f, 0.06408714f, 0.12835667f, -0.008714329f, + -0.20211966f, -0.12093674f, 0.029450472f, 0.2849013f, + -0.029227901f, 0.1164364f, -0.08560263f, 0.09941786f, + -0.036999565f, -0.028842626f, -0.0033637602f, -0.017012902f, + -0.09720865f, -0.11193351f, -0.029155117f, -0.017936034f, + -0.009768936f, -0.04223324f, -0.036159635f, 0.06505112f, + -0.021742892f, -0.023377212f, -0.07221364f, -0.06430552f, + 0.05453865f, 0.091149814f, 0.06387331f, 0.007518393f, + 0.055960953f, 0.069779344f, 0.046411168f, 0.10509911f, + 0.07463894f, 0.0075130584f, 0.012850982f, 0.04555431f, + 0.056955688f, 0.06555285f, 0.050801456f, -0.009862683f, + 0.00826772f, -0.026555609f, -0.0073611983f, -0.0014897042f + }); + + auto inputToOutputWeights = + MakeTensor(tensorInfo20x5, {-0.0998932f, -0.07201956f, -0.052803773f,-0.15629593f,-0.15001918f, + -0.07650751f,0.02359855f, -0.075155355f, -0.08037709f, -0.15093534f, + 0.029517552f, -0.04751393f, 0.010350531f,-0.02664851f, -0.016839722f, + -0.023121163f, 0.0077019283f, 0.012851257f, -0.05040649f,-0.0129761f, + -0.021737747f,-0.038305793f,-0.06870586f, -0.01481247f,-0.001285394f, + 0.10124236f, 0.083122835f, 0.053313006f,-0.062235646f,-0.075637154f, + -0.027833903f, 0.029774971f, 0.1130802f, 0.09218906f, 0.09506135f, + -0.086665764f,-0.037162706f,-0.038880914f,-0.035832845f,-0.014481564f, + -0.09825003f,-0.12048569f,-0.097665586f,-0.05287633f, -0.0964047f, + -0.11366429f, 0.035777505f, 0.13568819f, 0.052451383f,0.050649304f, + 0.05798951f, -0.021852335f,-0.099848844f,0.014740475f,-0.078897946f, + 0.04974699f, 0.014160473f, 0.06973932f, 0.04964942f, 0.033364646f, + 0.08190124f, 0.025535367f, 0.050893165f, 0.048514254f,0.06945813f, + -0.078907564f,-0.06707616f, -0.11844508f, -0.09986688f,-0.07509403f, + 0.06263226f, 0.14925587f, 0.20188436f, 0.12098451f,0.14639415f, + 0.0015017595f, -0.014267382f, -0.03417257f,0.012711468f,0.0028300495f, + -0.024758482f, -0.05098548f,-0.0821182f, 0.014225672f, 0.021544158f, + 0.08949725f, 0.07505268f, -0.0020780868f, 0.04908258f,0.06476295f, + -0.022907063f,0.027562456f,0.040185735f, 0.019567577f,-0.015598739f, + -0.049097303f, -0.017121866f, -0.083368234f,-0.02332002f,-0.0840956f + }); + + auto inputGateBias = + MakeTensor(tensorInfo20, {0.02234832f, 0.14757581f, 0.18176508f, 0.10380666f, 0.053110216f, + -0.06928846f, -0.13942584f, -0.11816189f, 0.19483899f, 0.03652339f, + -0.10250295f, 0.036714908f, -0.18426876f, 0.036065217f, 0.21810818f, + 0.02383196f, -0.043370757f, 0.08690144f, -0.04444982f, 0.00030581196f + }); + + auto forgetGateBias = + MakeTensor(tensorInfo20, {0.035185695f, -0.042891346f, -0.03032477f, 0.23027696f, + 0.11098921f, 0.15378423f, 0.09263801f, 0.09790885f, + 0.09508917f, 0.061199076f, 0.07665568f, -0.015443159f, + -0.03499149f, 0.046190713f, 0.08895977f, 0.10899629f, + 0.40694186f, 0.06030037f, 0.012413437f, -0.06108739f + }); + + auto cellBias = + MakeTensor(tensorInfo20, {-0.024379363f, 0.0055531194f, 0.23377132f, 0.033463873f, + -0.1483596f, -0.10639995f, -0.091433935f, 0.058573797f, + -0.06809782f, -0.07889636f, -0.043246906f, -0.09829136f, + -0.4279842f, 0.034901652f, 0.18797937f, 0.0075234566f, + 0.016178843f, 0.1749513f, 0.13975595f, 0.92058027f + }); + + auto outputGateBias = + MakeTensor(tensorInfo20, {0.046159424f, -0.0012809046f, 0.03563469f, 0.12648113f, 0.027195795f, + 0.35373217f, -0.018957434f, 0.008907322f, -0.0762701f, 0.12018895f, + 0.04216877f, 0.0022856654f, 0.040952638f, 0.3147856f, 0.08225149f, + -0.057416286f, -0.14995944f, -0.008040261f, 0.13208859f, 0.029760877f + }); + + auto recurrentToInputWeights = + MakeTensor(tensorInfo20x16, {-0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f, + -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f, + -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f, + -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f, + 0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f, + 0.08981f, -0.045407712f, 0.08682226f, -0.06867011f, + -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f, + 0.14283475f, -0.07390571f, -0.06402044f, 0.062524505f, + -0.093129106f, 0.04860203f, -0.08364217f, -0.08119002f, + 0.009352075f, 0.22920375f, 0.0016303885f, 0.11583097f, + -0.13732095f, 0.012405723f, -0.07551853f, 0.06343048f, + 0.12162708f, -0.031923793f, -0.014335606f, 0.01790974f, + -0.10650317f, -0.0724401f, 0.08554849f, -0.05727212f, + 0.06556731f, -0.042729504f, -0.043227166f, 0.011683251f, + -0.013082158f, -0.029302018f, -0.010899579f, -0.062036745f, + -0.022509435f, -0.00964907f, -0.01567329f, 0.04260106f, + -0.07787477f, -0.11576462f, 0.017356863f, 0.048673786f, + -0.017577527f, -0.05527947f, -0.082487635f, -0.040137455f, + -0.10820036f, -0.04666372f, 0.022746278f, -0.07851417f, + 0.01068115f, 0.032956902f, 0.022433773f, 0.0026891115f, + 0.08944216f, -0.0685835f, 0.010513544f, 0.07228705f, + 0.02032331f, -0.059686817f, -0.0005566496f, -0.086984694f, + 0.040414046f, -0.1380399f, 0.094208956f, -0.05722982f, + 0.012092817f, -0.04989123f, -0.086576f, -0.003399834f, + -0.04696032f, -0.045747425f, 0.10091314f, 0.048676282f, + -0.029037097f, 0.031399418f, -0.0040285117f, 0.047237843f, + 0.09504992f, 0.041799378f, -0.049185462f, -0.031518843f, + -0.10516937f, 0.026374253f, 0.10058866f, -0.0033195973f, + -0.041975245f, 0.0073591834f, 0.0033782164f, -0.004325073f, + -0.10167381f, 0.042500053f, -0.01447153f, 0.06464186f, + -0.017142897f, 0.03312627f, 0.009205989f, 0.024138335f, + -0.011337001f, 0.035530265f, -0.010912711f, 0.0706555f, + -0.005894094f, 0.051841937f, -0.1401738f, -0.02351249f, + 0.0365468f, 0.07590991f, 0.08838724f, 0.021681072f, + -0.10086113f, 0.019608743f, -0.06195883f, 0.077335775f, + 0.023646897f, -0.095322326f, 0.02233014f, 0.09756986f, + -0.048691444f, -0.009579111f, 0.07595467f, 0.11480546f, + -0.09801813f, 0.019894179f, 0.08502348f, 0.004032281f, + 0.037211012f, 0.068537936f, -0.048005626f, -0.091520436f, + -0.028379958f, -0.01556313f, 0.06554592f, -0.045599163f, + -0.01672207f, -0.020169014f, -0.011877351f, -0.20212261f, + 0.010889619f, 0.0047078193f, 0.038385306f, 0.08540671f, + -0.017140968f, -0.0035865551f, 0.016678626f, 0.005633034f, + 0.015963363f, 0.00871737f, 0.060130805f, 0.028611384f, + 0.10109069f, -0.015060172f, -0.07894427f, 0.06401885f, + 0.011584063f, -0.024466386f, 0.0047652307f, -0.09041358f, + 0.030737216f, -0.0046374933f, 0.14215417f, -0.11823516f, + 0.019899689f, 0.006106124f, -0.027092824f, 0.0786356f, + 0.05052217f, -0.058925f, -0.011402121f, -0.024987547f, + -0.0013661642f, -0.06832946f, -0.015667673f, -0.1083353f, + -0.00096863037f, -0.06988685f, -0.053350925f, -0.027275559f, + -0.033664223f, -0.07978348f, -0.025200296f, -0.017207067f, + -0.058403496f, -0.055697463f, 0.005798788f, 0.12965427f, + -0.062582195f, 0.0013350133f, -0.10482091f, 0.0379771f, + 0.072521195f, -0.0029455067f, -0.13797039f, -0.03628521f, + 0.013806405f, -0.017858358f, -0.01008298f, -0.07700066f, + -0.017081132f, 0.019358726f, 0.0027079724f, 0.004635139f, + 0.062634714f, -0.02338735f, -0.039547626f, -0.02050681f, + 0.03385117f, -0.083611414f, 0.002862572f, -0.09421313f, + 0.058618143f, -0.08598433f, 0.00972939f, 0.023867095f, + -0.053934585f, -0.023203006f, 0.07452513f, -0.048767887f, + -0.07314807f, -0.056307215f, -0.10433547f, -0.06440842f, + 0.04328182f, 0.04389765f, -0.020006588f, -0.09076438f, + -0.11652589f, -0.021705797f, 0.03345259f, -0.010329105f, + -0.025767034f, 0.013057034f, -0.07316461f, -0.10145612f, + 0.06358255f, 0.18531723f, 0.07759293f, 0.12006465f, + 0.1305557f, 0.058638252f, -0.03393652f, 0.09622831f, + -0.16253184f, -2.4580743e-06f, 0.079869635f, -0.070196845f, + -0.005644518f, 0.06857898f, -0.12598175f, -0.035084512f, + 0.03156317f, -0.12794146f, -0.031963028f, 0.04692781f, + 0.030070418f, 0.0071660685f, -0.095516115f, -0.004643372f, + 0.040170413f, -0.062104587f, -0.0037324072f, 0.0554317f, + 0.08184801f, -0.019164372f, 0.06791302f, 0.034257166f, + -0.10307039f, 0.021943003f, 0.046745934f, 0.0790918f, + -0.0265588f, -0.007824208f, 0.042546265f, -0.00977924f, + -0.0002440307f, -0.017384544f, -0.017990116f, 0.12252321f, + -0.014512694f, -0.08251313f, 0.08861942f, 0.13589665f, + 0.026351685f, 0.012641483f, 0.07466548f, 0.044301085f, + -0.045414884f, -0.051112458f, 0.03444247f, -0.08502782f, + -0.04106223f, -0.028126027f, 0.028473156f, 0.10467447f + }); + + auto recurrentToForgetWeights = + MakeTensor(tensorInfo20x16, {-0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f, + 0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f, + -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f, + 0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f, + 0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f, + -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f, + -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f, + 0.061878487f, -0.04729229f, 0.034919553f, -0.07585433f, + -0.04421272f, -0.044019096f, 0.085488975f, 0.04058006f, + -0.06890133f, -0.030951202f, -0.024628663f, -0.07672815f, + 0.034293607f, 0.08556707f, -0.05293577f, -0.033561368f, + -0.04899627f, 0.0241671f, 0.015736353f, -0.095442444f, + -0.029564252f, 0.016493602f, -0.035026584f, 0.022337519f, + -0.026871363f, 0.004780428f, 0.0077918363f, -0.03601621f, + 0.016435321f, -0.03263031f, -0.09543275f, -0.047392778f, + 0.013454138f, 0.028934088f, 0.01685226f, -0.086110644f, + -0.046250615f, -0.01847454f, 0.047608484f, 0.07339695f, + 0.034546845f, -0.04881143f, 0.009128804f, -0.08802852f, + 0.03761666f, 0.008096139f, -0.014454086f, 0.014361001f, + -0.023502491f, -0.0011840804f, -0.07607001f, 0.001856849f, + -0.06509276f, -0.006021153f, -0.08570962f, -0.1451793f, + 0.060212336f, 0.055259194f, 0.06974018f, 0.049454916f, + -0.027794661f, -0.08077226f, -0.016179763f, 0.1169753f, + 0.17213494f, -0.0056326236f, -0.053934924f, -0.0124349f, + -0.11520337f, 0.05409887f, 0.088759385f, 0.0019655675f, + 0.0042065294f, 0.03881498f, 0.019844765f, 0.041858196f, + -0.05695512f, 0.047233116f, 0.038937137f, -0.06542224f, + 0.014429736f, -0.09719407f, 0.13908425f, -0.05379757f, + 0.012321099f, 0.082840554f, -0.029899208f, 0.044217527f, + 0.059855383f, 0.07711018f, -0.045319796f, 0.0948846f, + -0.011724666f, -0.0033288454f, -0.033542685f, -0.04764985f, + -0.13873616f, 0.040668588f, 0.034832682f, -0.015319203f, + -0.018715994f, 0.046002675f, 0.0599172f, -0.043107376f, + 0.0294216f, -0.002314414f, -0.022424703f, 0.0030315618f, + 0.0014641669f, 0.0029166266f, -0.11878115f, 0.013738511f, + 0.12375372f, -0.0006038222f, 0.029104086f, 0.087442465f, + 0.052958444f, 0.07558703f, 0.04817258f, 0.044462286f, + -0.015213451f, -0.08783778f, -0.0561384f, -0.003008196f, + 0.047060397f, -0.002058388f, 0.03429439f, -0.018839769f, + 0.024734668f, 0.024614193f, -0.042046934f, 0.09597743f, + -0.0043254104f, 0.04320769f, 0.0064070094f, -0.0019131786f, + -0.02558259f, -0.022822596f, -0.023273505f, -0.02464396f, + -0.10991725f, -0.006240552f, 0.0074488563f, 0.024044557f, + 0.04383914f, -0.046476185f, 0.028658995f, 0.060410924f, + 0.050786525f, 0.009452605f, -0.0073054377f, -0.024810238f, + 0.0052906186f, 0.0066939713f, -0.0020913032f, 0.014515517f, + 0.015898481f, 0.021362653f, -0.030262267f, 0.016587038f, + -0.011442813f, 0.041154444f, -0.007631438f, -0.03423484f, + -0.010977775f, 0.036152758f, 0.0066366293f, 0.11915515f, + 0.02318443f, -0.041350313f, 0.021485701f, -0.10906167f, + -0.028218046f, -0.00954771f, 0.020531068f, -0.11995105f, + -0.03672871f, 0.024019798f, 0.014255957f, -0.05221243f, + -0.00661567f, -0.04630967f, 0.033188973f, 0.10107534f, + -0.014027541f, 0.030796422f, -0.10270911f, -0.035999842f, + 0.15443139f, 0.07684145f, 0.036571592f, -0.035900835f, + -0.0034699554f, 0.06209149f, 0.015920248f, -0.031122351f, + -0.03858649f, 0.01849943f, 0.13872518f, 0.01503974f, + 0.069941424f, -0.06948533f, -0.0088794185f, 0.061282158f, + -0.047401894f, 0.03100163f, -0.041533746f, -0.10430945f, + 0.044574402f, -0.01425562f, -0.024290353f, 0.034563623f, + 0.05866852f, 0.023947537f, -0.09445152f, 0.035450947f, + 0.02247216f, -0.0042998926f, 0.061146557f, -0.10250651f, + 0.020881841f, -0.06747029f, 0.10062043f, -0.0023941975f, + 0.03532124f, -0.016341697f, 0.09685456f, -0.016764693f, + 0.051808182f, 0.05875331f, -0.04536488f, 0.001626336f, + -0.028892258f, -0.01048663f, -0.009793449f, -0.017093895f, + 0.010987891f, 0.02357273f, -0.00010856845f, 0.0099760275f, + -0.001845119f, -0.03551521f, 0.0018358806f, 0.05763657f, + -0.01769146f, 0.040995963f, 0.02235177f, -0.060430344f, + 0.11475477f, -0.023854522f, 0.10071741f, 0.0686208f, + -0.014250481f, 0.034261297f, 0.047418304f, 0.08562733f, + -0.030519066f, 0.0060542435f, 0.014653856f, -0.038836084f, + 0.04096551f, 0.032249358f, -0.08355519f, -0.026823482f, + 0.056386515f, -0.010401743f, -0.028396193f, 0.08507674f, + 0.014410365f, 0.020995233f, 0.17040324f, 0.11511526f, + 0.02459721f, 0.0066619175f, 0.025853224f, -0.023133837f, + -0.081302024f, 0.017264642f, -0.009585969f, 0.09491168f, + -0.051313367f, 0.054532815f, -0.014298593f, 0.10657464f, + 0.007076659f, 0.10964551f, 0.0409152f, 0.008275321f, + -0.07283536f, 0.07937492f, 0.04192024f, -0.1075027f + }); + + auto recurrentToCellWeights = + MakeTensor(tensorInfo20x16, {-0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f, + 0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f, + 0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f, + -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f, + 0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f, + 0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f, + -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f, + -0.019443132f, -0.030755889f, -0.0040000007f, 0.04465846f, + -0.021585021f, 0.0031670958f, 0.0053199246f, -0.056117613f, + -0.10893326f, 0.076739706f, -0.08509834f, -0.027997585f, + 0.037871376f, 0.01449768f, -0.09002357f, -0.06111149f, + -0.046195522f, 0.0422062f, -0.005683705f, -0.1253618f, + -0.012925729f, -0.04890792f, 0.06985068f, 0.037654128f, + 0.03398274f, -0.004781977f, 0.007032333f, -0.031787455f, + 0.010868644f, -0.031489216f, 0.09525667f, 0.013939797f, + 0.0058680447f, 0.0167067f, 0.02668468f, -0.04797466f, + -0.048885044f, -0.12722108f, 0.035304096f, 0.06554885f, + 0.00972396f, -0.039238118f, -0.05159735f, -0.11329045f, + 0.1613692f, -0.03750952f, 0.06529313f, -0.071974665f, + -0.11769596f, 0.015524369f, -0.0013754242f, -0.12446318f, + 0.02786344f, -0.014179351f, 0.005264273f, 0.14376344f, + 0.015983658f, 0.03406988f, -0.06939408f, 0.040699873f, + 0.02111075f, 0.09669095f, 0.041345075f, -0.08316494f, + -0.07684199f, -0.045768797f, 0.032298047f, -0.041805092f, + 0.0119405f, 0.0061010392f, 0.12652606f, 0.0064572375f, + -0.024950314f, 0.11574242f, 0.04508852f, -0.04335324f, + 0.06760663f, -0.027437469f, 0.07216407f, 0.06977076f, + -0.05438599f, 0.034033038f, -0.028602652f, 0.05346137f, + 0.043184172f, -0.037189785f, 0.10420091f, 0.00882477f, + -0.054019816f, -0.074273005f, -0.030617684f, -0.0028467078f, + 0.024302477f, -0.0038869337f, 0.005332455f, 0.0013399826f, + 0.04361412f, -0.007001822f, 0.09631092f, -0.06702025f, + -0.042049985f, -0.035070654f, -0.04103342f, -0.10273396f, + 0.0544271f, 0.037184782f, -0.13150354f, -0.0058036847f, + -0.008264958f, 0.042035464f, 0.05891794f, 0.029673764f, + 0.0063542654f, 0.044788733f, 0.054816857f, 0.062257513f, + -0.00093483756f, 0.048938446f, -0.004952862f, -0.007730018f, + -0.04043371f, -0.017094059f, 0.07229206f, -0.023670016f, + -0.052195564f, -0.025616996f, -0.01520939f, 0.045104615f, + -0.007376126f, 0.003533447f, 0.006570588f, 0.056037236f, + 0.12436656f, 0.051817212f, 0.028532185f, -0.08686856f, + 0.11868599f, 0.07663395f, -0.07323171f, 0.03463402f, + -0.050708205f, -0.04458982f, -0.11590894f, 0.021273347f, + 0.1251325f, -0.15313013f, -0.12224372f, 0.17228661f, + 0.023029093f, 0.086124025f, 0.006445803f, -0.03496501f, + 0.028332196f, 0.04449512f, -0.042436164f, -0.026587414f, + -0.006041347f, -0.09292539f, -0.05678812f, 0.03897832f, + 0.09465633f, 0.008115513f, -0.02171956f, 0.08304309f, + 0.071401566f, 0.019622514f, 0.032163795f, -0.004167056f, + 0.02295182f, 0.030739572f, 0.056506045f, 0.004612461f, + 0.06524936f, 0.059999723f, 0.046395954f, -0.0045512207f, + -0.1335546f, -0.030136576f, 0.11584653f, -0.014678886f, + 0.0020118146f, -0.09688814f, -0.0790206f, 0.039770417f, + -0.0329582f, 0.07922767f, 0.029322514f, 0.026405897f, + 0.04207835f, -0.07073373f, 0.063781224f, 0.0859677f, + -0.10925287f, -0.07011058f, 0.048005477f, 0.03438226f, + -0.09606514f, -0.006669445f, -0.043381985f, 0.04240257f, + -0.06955775f, -0.06769346f, 0.043903265f, -0.026784198f, + -0.017840602f, 0.024307009f, -0.040079936f, -0.019946516f, + 0.045318738f, -0.12233574f, 0.026170589f, 0.0074471775f, + 0.15978073f, 0.10185836f, 0.10298046f, -0.015476589f, + -0.039390966f, -0.072174534f, 0.0739445f, -0.1211869f, + -0.0347889f, -0.07943156f, 0.014809798f, -0.12412325f, + -0.0030663363f, 0.039695457f, 0.0647603f, -0.08291318f, + -0.018529687f, -0.004423833f, 0.0037507233f, 0.084633216f, + -0.01514876f, -0.056505352f, -0.012800942f, -0.06994386f, + 0.012962922f, -0.031234352f, 0.07029052f, 0.016418684f, + 0.03618972f, 0.055686004f, -0.08663945f, -0.017404709f, + -0.054761406f, 0.029065743f, 0.052404847f, 0.020238016f, + 0.0048197987f, -0.0214882f, 0.07078733f, 0.013016777f, + 0.06262858f, 0.009184685f, 0.020785125f, -0.043904778f, + -0.0270329f, -0.03299152f, -0.060088247f, -0.015162964f, + -0.001828936f, 0.12642565f, -0.056757294f, 0.013586685f, + 0.09232601f, -0.035886683f, 0.06000002f, 0.05229691f, + -0.052580316f, -0.082029596f, -0.010794592f, 0.012947712f, + -0.036429964f, -0.085508935f, -0.13127148f, -0.017744139f, + 0.031502828f, 0.036232427f, -0.031581745f, 0.023051167f, + -0.05325106f, -0.03421577f, 0.028793324f, -0.034633752f, + -0.009881397f, -0.043551125f, -0.018609839f, 0.0019097115f, + -0.008799762f, 0.056595087f, 0.0022273948f, 0.055752404f + }); + + auto recurrentToOutputWeights = + MakeTensor(tensorInfo20x16, {0.025825322f, -0.05813119f, 0.09495884f,-0.045984812f, -0.01255415f, + -0.0026479573f,-0.08196161f,-0.054914974f,-0.0046604523f, + -0.029587349f, -0.044576716f, -0.07480124f, -0.082868785f, + 0.023254942f, 0.027502948f, -0.0039728214f, -0.08683098f, + -0.08116779f, -0.014675607f, -0.037924774f, -0.023314456f, + -0.007401714f, -0.09255757f, 0.029460307f, -0.08829125f, + -0.005139627f, -0.08989442f, -0.0555066f, 0.13596267f, + -0.025062224f, -0.048351806f, -0.03850004f, 0.07266485f, + -0.022414139f, 0.05940088f, 0.075114764f, 0.09597592f, + -0.010211725f, -0.0049794707f, -0.011523867f, -0.025980417f, + 0.072999895f, 0.11091378f, -0.081685916f, 0.014416728f, + 0.043229222f, 0.034178585f, -0.07530371f, 0.035837382f, + -0.085607f, -0.007721233f, -0.03287832f, -0.043848954f, + -0.06404588f, -0.06632928f, -0.073643476f, 0.008214239f, + -0.045984086f, 0.039764922f, 0.03474462f, 0.060612556f, + -0.080590084f, 0.049127717f, 0.04151091f, -0.030063879f, + 0.008801774f, -0.023021035f, -0.019558564f, 0.05158114f, + -0.010947698f, -0.011825728f, 0.0075720972f, 0.0699727f, + -0.0039981045f, 0.069350146f, 0.08799282f, 0.016156472f, + 0.035502106f, 0.11695009f, 0.006217345f, 0.13392477f, + -0.037875112f, 0.025745004f, 0.08940699f, -0.00924166f, + 0.0046702605f, -0.036598757f, -0.08811812f, 0.10522024f, + -0.032441203f, 0.008176899f, -0.04454919f, 0.07058152f, + 0.0067963637f, 0.039206743f, 0.03259838f, 0.03725492f, + -0.09515802f, 0.013326398f, -0.052055415f, -0.025676316f, + 0.03198509f, -0.015951829f, -0.058556724f, 0.036879618f, + 0.043357447f, 0.028362012f, -0.05908629f, 0.0059240665f, + -0.04995891f, -0.019187413f,0.0276265f, -0.01628143f, 0.0025863599f, + 0.08800015f, 0.035250366f, -0.022165963f, -0.07328642f, + -0.009415526f, -0.07455109f, 0.11690406f, 0.0363299f, + 0.07411125f, 0.042103454f, -0.009660886f, 0.019076364f, + 0.018299393f, -0.046004917f, 0.08891175f,0.0431396f, -0.026327137f, + -0.051502608f, 0.08979574f, -0.051670972f, 0.04940282f, + -0.07491107f, -0.021240504f, 0.022596184f, -0.034280192f, + 0.060163025f, -0.058211457f, -0.051837247f, -0.01349775f, + -0.04639988f, -0.035936575f, -0.011681591f, 0.064818054f, + 0.0073146066f, -0.021745546f, -0.043124277f, -0.06471268f, + -0.07053354f, -0.029321948f, -0.05330136f, 0.016933719f, + -0.053782392f, 0.13747959f, -0.1361751f, -0.11569455f, + 0.0033329215f, 0.05693899f, -0.053219706f, 0.063698f, + 0.07977434f, -0.07924483f, 0.06936997f, 0.0034815092f, + -0.007305279f, -0.037325785f, -0.07251102f, -0.033633437f, + -0.08677009f, 0.091591336f, -0.14165086f, 0.021752775f, + 0.019683983f, 0.0011612234f, -0.058154266f, 0.049996935f, + 0.0288841f, -0.0024567875f, -0.14345716f, 0.010955264f,-0.10234828f, + 0.1183656f, -0.0010731248f, -0.023590032f,-0.072285876f,-0.0724771f, + -0.026382286f, -0.0014920527f, 0.042667855f, 0.0018776858f, + 0.02986552f, 0.009814309f, 0.0733756f, 0.12289186f, + 0.018043943f, -0.0458958f, 0.049412545f, 0.033632483f, + 0.05495232f, 0.036686596f, -0.013781798f, -0.010036754f, + 0.02576849f, -0.08307328f, 0.010112348f, 0.042521734f, + -0.05869831f, -0.071689695f, 0.03876447f, -0.13275425f, -0.0352966f, + -0.023077697f, 0.10285965f, 0.084736146f, 0.15568255f, + -0.00040734606f, 0.027835453f, -0.10292561f, -0.032401145f, + 0.10053256f, -0.026142767f, -0.08271222f, -0.0030240538f, + -0.016368777f, 0.1070414f, 0.042672627f, 0.013456989f, + -0.0437609f, -0.022309763f, 0.11576483f, 0.04108048f, + 0.061026827f, -0.0190714f, -0.0869359f, 0.037901703f, 0.0610107f, + 0.07202949f, 0.01675338f, 0.086139716f, -0.08795751f, + -0.014898893f, -0.023771819f, -0.01965048f, 0.007955471f, + -0.043740474f, 0.03346837f, -0.10549954f, 0.090567775f, + 0.042013682f, -0.03176985f, 0.12569028f, -0.02421228f, + -0.029526481f, 0.023851605f, 0.031539805f, 0.05292009f, + -0.02344001f, -0.07811758f, -0.08834428f, 0.10094801f, + 0.16594367f, -0.06861939f, -0.021256343f, -0.041093912f, + -0.06669611f, 0.035498552f, 0.021757556f, -0.09302526f, + -0.015403468f, -0.06614931f, -0.051798206f, -0.013874718f, + 0.03630673f, 0.010412845f, -0.08077351f, 0.046185967f, + 0.0035662893f, 0.03541868f, -0.094149634f, -0.034814864f, + 0.003128424f, -0.020674974f, -0.03944324f, -0.008110165f, + -0.11113267f, 0.08484226f, 0.043586485f, 0.040582247f, + 0.0968012f, -0.065249965f, -0.028036479f, 0.0050708856f, + 0.0017462453f, 0.0326779f, 0.041296225f, 0.09164146f, + -0.047743853f, -0.015952192f, -0.034451712f, 0.084197424f, + -0.05347844f, -0.11768019f, 0.085926116f, -0.08251791f, + -0.045081906f, 0.0948852f, 0.068401024f, 0.024856757f, + 0.06978981f, -0.057309967f, -0.012775832f, -0.0032452994f, + 0.01977615f, -0.041040014f, -0.024264973f,0.063464895f, 0.05431621f + }); + + auto cellToInputWeights = + MakeTensor(tensorInfo20, {0.040369894f, 0.030746894f, 0.24704495f, 0.018586371f, -0.037586458f, + -0.15312155f, -0.11812848f, -0.11465643f, 0.20259799f, 0.11418174f, + -0.10116027f, -0.011334949f, 0.12411352f, -0.076769054f,-0.052169047f, + 0.21198851f, -0.38871562f, -0.09061183f, -0.09683246f, -0.21929175f + }); + + + auto cellToForgetWeights = + MakeTensor(tensorInfo20, {-0.01998659f,-0.15568835f,-0.24248174f, -0.012770197f, 0.041331276f, + -0.072311886f, -0.052123554f,-0.0066330447f,-0.043891653f,0.036225766f, + -0.047248036f, 0.021479502f,0.033189066f, 0.11952997f, -0.020432774f, + 0.64658105f, -0.06650122f, -0.03467612f, 0.095340036f, 0.23647355f + }); + + auto cellToOutputWeights = + MakeTensor(tensorInfo20, {0.08286371f, -0.08261836f, -0.51210177f, 0.002913762f, 0.17764764f, + -0.5495371f, -0.08460716f, -0.24552552f, 0.030037103f, 0.04123544f, + -0.11940523f, 0.007358328f, 0.1890978f, 0.4833202f, -0.34441817f, + 0.36312827f, -0.26375428f, 0.1457655f, -0.19724406f, 0.15548733f + }); + + auto projectionWeights = + MakeTensor(tensorInfo16x20, + {-0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f, + 0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f, + -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f, + -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f, + 0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f, + 0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f, + 0.08682067f, 0.17240396f, 0.014975425f, 0.056431185f, 0.031037588f, + 0.16702051f, 0.0077946745f, 0.15140012f, 0.29405436f, 0.120285f, + -0.188994f, -0.027265169f, 0.043389652f, -0.022061434f, 0.014777949f, + -0.20203483f, 0.094781205f, 0.19100232f, 0.13987629f, -0.036132768f, + -0.06426278f, -0.05108664f, 0.13221376f, 0.009441198f, -0.16715929f, + 0.15859416f, -0.040437475f, 0.050779544f, -0.022187516f, 0.012166504f, + 0.027685808f, -0.07675938f, -0.0055694645f, -0.09444123f, 0.0046453946f, + 0.050794356f, 0.10770313f, -0.20790008f, -0.07149004f, -0.11425117f, + 0.008225835f, -0.035802525f, 0.14374903f, 0.15262283f, 0.048710253f, + 0.1847461f, -0.007487823f, 0.11000021f, -0.09542012f, 0.22619456f, + -0.029149994f, 0.08527916f, 0.009043713f, 0.0042746216f, 0.016261552f, + 0.022461696f, 0.12689082f, -0.043589946f, -0.12035478f, -0.08361797f, + -0.050666027f, -0.1248618f, -0.1275799f, -0.071875185f, 0.07377272f, + 0.09944291f, -0.18897448f, -0.1593054f, -0.06526116f, -0.040107165f, + -0.004618631f, -0.067624845f, -0.007576253f, 0.10727444f, 0.041546922f, + -0.20424393f, 0.06907816f, 0.050412357f, 0.00724631f, 0.039827548f, + 0.12449835f, 0.10747581f, 0.13708383f, 0.09134148f, -0.12617786f, + -0.06428341f, 0.09956831f, 0.1208086f, -0.14676677f, -0.0727722f, + 0.1126304f, 0.010139365f, 0.015571211f, -0.038128063f, 0.022913318f, + -0.042050496f, 0.16842307f, -0.060597885f, 0.10531834f, -0.06411776f, + -0.07451711f, -0.03410368f, -0.13393489f, 0.06534304f, 0.003620307f, + 0.04490757f, 0.05970546f, 0.05197996f, 0.02839995f, 0.10434969f, + -0.013699693f, -0.028353551f, -0.07260381f, 0.047201227f, -0.024575593f, + -0.036445823f, 0.07155557f, 0.009672501f, -0.02328883f, 0.009533515f, + -0.03606021f, -0.07421458f, -0.028082801f, -0.2678904f, -0.13221288f, + 0.18419984f, -0.13012612f, -0.014588381f, -0.035059117f, -0.04824723f, + 0.07830115f, -0.056184657f, 0.03277091f, 0.025466874f, 0.14494097f, + -0.12522776f, -0.098633975f, -0.10766018f, -0.08317623f, 0.08594209f, + 0.07749552f, 0.039474737f, 0.1776665f, -0.07409566f, -0.0477268f, + 0.29323658f, 0.10801441f, 0.1154011f, 0.013952499f, 0.10739139f, + 0.10708251f, -0.051456142f, 0.0074137426f, -0.10430189f, 0.10034707f, + 0.045594677f, 0.0635285f, -0.0715442f, -0.089667566f, -0.10811871f, + 0.00026344223f, 0.08298446f, -0.009525053f, 0.006585689f, -0.24567553f, + -0.09450807f, 0.09648481f, 0.026996298f, -0.06419476f, -0.04752702f, + -0.11063944f, -0.23441927f, -0.17608605f, -0.052156363f, 0.067035615f, + 0.19271925f, -0.0032889997f, -0.043264326f, 0.09663576f, -0.057112187f, + -0.10100678f, 0.0628376f, 0.04447668f, 0.017961001f, -0.10094388f, + -0.10190601f, 0.18335468f, 0.10494553f, -0.052095775f, -0.0026118709f, + 0.10539724f, -0.04383912f, -0.042349473f, 0.08438151f, -0.1947263f, + 0.02251204f, 0.11216432f, -0.10307853f, 0.17351969f, -0.039091777f, + 0.08066188f, -0.00561982f, 0.12633002f, 0.11335965f, -0.0088127935f, + -0.019777594f, 0.06864014f, -0.059751723f, 0.016233567f, -0.06894641f, + -0.28651384f, -0.004228674f, 0.019708522f, -0.16305895f, -0.07468996f, + -0.0855457f, 0.099339016f, -0.07580735f, -0.13775392f, 0.08434318f, + 0.08330512f, -0.12131499f, 0.031935584f, 0.09180414f, -0.08876437f, + -0.08049874f, 0.008753825f, 0.03498998f, 0.030215185f, 0.03907079f, + 0.089751154f, 0.029194152f, -0.03337423f, -0.019092513f, 0.04331237f, + 0.04299654f, -0.036394123f, -0.12915532f, 0.09793732f, 0.07512415f, + -0.11319543f, -0.032502122f, 0.15661901f, 0.07671967f, -0.005491124f, + -0.19379048f, -0.218606f, 0.21448623f, 0.017840758f, 0.1416943f, + -0.07051762f, 0.19488361f, 0.02664691f, -0.18104725f, -0.09334311f, + 0.15026465f, -0.15493552f, -0.057762887f, -0.11604192f, -0.262013f, + -0.01391798f, 0.012185008f, 0.11156489f, -0.07483202f, 0.06693364f, + -0.26151478f, 0.046425626f, 0.036540434f, -0.16435726f, 0.17338543f, + -0.21401681f, -0.11385144f, -0.08283257f, -0.069031075f, 0.030635102f, + 0.010969227f, 0.11109743f, 0.010919218f, 0.027526086f, 0.13519906f, + 0.01891392f, -0.046839405f, -0.040167913f, 0.017953383f, -0.09700955f, + 0.0061885654f, -0.07000971f, 0.026893595f, -0.038844477f, 0.14543656f + }); + + std::vector projectionBiasVector(outputSize, 0.f); + auto projectionBias = MakeTensor(tensorInfo16, projectionBiasVector); + + armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo20x5); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo20x16); + armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo20); + armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo16x20); + armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo16); + + AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]); + AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); + AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]); + + data.m_InputToInputWeights = &inputToInputWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + data.m_CellToInputWeights = &cellToInputWeightsTensor; + data.m_InputGateBias = &inputGateBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_CellBias = &cellBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + data.m_ProjectionWeights = &projectionWeightsTensor; + data.m_ProjectionBias = &projectionBiasTensor; + + // Flags to set test configuration + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_CifgEnabled = false; + data.m_Parameters.m_PeepholeEnabled = true; + data.m_Parameters.m_ProjectionEnabled = true; + + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + return ret; + +} + + +LayerTestResult LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + bool cifgEnabled = true; + bool peepholeEnabled = true; + bool projectionEnabled = false; + // These are not the input and the output of Lstm yet + unsigned int batchSize = boost::numeric_cast(input.shape()[0]); + unsigned int inputSize = boost::numeric_cast(input.shape()[1]); + + unsigned int outputSize = boost::numeric_cast(outputExpected.shape()[1]); + + const unsigned int cellSize = outputSize; + + // Decide the shape of all input tensors + armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, armnn::GetDataType()); + armnn::TensorInfo outputStateInTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateInTensorInfo({batchSize, cellSize}, armnn::GetDataType()); + + unsigned int scratchBufferSize = cifgEnabled ? cellSize * 4 : cellSize * 3; + armnn::TensorInfo scratchBufferTensorInfo({batchSize, scratchBufferSize}, armnn::GetDataType()); + armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo cellStateOutTensorInfo({batchSize, cellSize}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, armnn::GetDataType()); + + // List of inputs + std::vector inputData; + inputData.assign(input.data(), input.data() + batchSize*inputSize); + auto inputTensor = MakeTensor(inputTensorInfo, inputData); + + std::vector outputStateInVector(batchSize * outputSize, 0.f); + auto outputStateInTensor = MakeTensor(outputStateInTensorInfo, outputStateInVector); + + std::vector cellStateInVector(batchSize * cellSize, 0.f); + auto cellStateInTensor = MakeTensor(cellStateInTensorInfo, cellStateInVector); + + + // Prepare all the weights in the descriptor for LSTM + armnn::LstmQueueDescriptor data; + armnn::TensorInfo tensorInfoInput({cellSize, inputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfoOutput({cellSize, outputSize}, armnn::GetDataType()); + armnn::TensorInfo tensorInfoNumUnits({cellSize}, armnn::GetDataType()); + + auto inputToCellWeights = MakeTensor(tensorInfoInput, + {-0.49770179f, -0.27711356f, -0.09624726f, 0.05100781f, + 0.04717243f, 0.48944736f, -0.38535351f, + -0.17212132f}); + auto inputToForgetWeights = MakeTensor(tensorInfoInput, + {-0.55291498f, -0.42866567f, 0.13056988f, + -0.3633365f, -0.22755712f, 0.28253698f, 0.24407166f, + 0.33826375f}); + auto inputToOutputWeights = MakeTensor(tensorInfoInput, + {0.10725588f, -0.02335852f, -0.55932593f, + -0.09426838f, -0.44257352f, 0.54939759f, + 0.01533556f, 0.42751634f}); + auto cellBias = MakeTensor(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f}); + auto forgetGateBias = MakeTensor(tensorInfoNumUnits, {1.f, 1.f, 1.f, 1.f}); + auto outputGateBias = MakeTensor(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f}); + + auto recurrentToCellWeights = MakeTensor(tensorInfoOutput, + {0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f, 0.42957711f, + 0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f, 0.20675004f, + 0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f, 0.44901288f, + 0.21193194f}); + auto recurrentToForgetWeights = MakeTensor(tensorInfoOutput, + {-0.13832897f, -0.0515101f, -0.2359007f, -0.16661474f, -0.14340827f, + 0.36986142f, 0.23414481f, 0.55899f, 0.10798943f, -0.41174671f, 0.17751795f, + -0.34484994f, -0.35874045f, -0.11352962f, 0.27268326f, 0.54058349f}); + + auto recurrentToOutputWeights = MakeTensor(tensorInfoOutput, + {0.41613156f, 0.42610586f, -0.16495961f, -0.5663873f, 0.30579174f, -0.05115908f, + -0.33941799f, 0.23364776f, 0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f, + 0.50248802f, 0.26114327f, -0.43736315f, 0.33149987f}); + + auto cellToForgetWeights = MakeTensor(tensorInfoNumUnits, + {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f}); + auto cellToOutputWeights = MakeTensor(tensorInfoNumUnits, + {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f}); + + armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfoInput); + armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfoInput); + armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfoInput); + + armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfoNumUnits); + armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfoNumUnits); + armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfoNumUnits); + + armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfoOutput); + armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfoOutput); + armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfoOutput); + + + armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfoNumUnits); + armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfoNumUnits); + + AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]); + AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]); + AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]); + + AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]); + AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]); + + AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]); + AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]); + + + data.m_InputToCellWeights = &inputToCellWeightsTensor; + data.m_InputToForgetWeights = &inputToForgetWeightsTensor; + data.m_InputToOutputWeights = &inputToOutputWeightsTensor; + + data.m_CellBias = &cellBiasTensor; + data.m_ForgetGateBias = &forgetGateBiasTensor; + data.m_OutputGateBias = &outputGateBiasTensor; + + data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor; + data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor; + data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor; + + data.m_CellToForgetWeights = &cellToForgetWeightsTensor; + data.m_CellToOutputWeights = &cellToOutputWeightsTensor; + + // other parameters for the descriptor + data.m_Parameters.m_CifgEnabled = cifgEnabled; + data.m_Parameters.m_ProjectionEnabled = projectionEnabled; + data.m_Parameters.m_PeepholeEnabled = peepholeEnabled; + + data.m_Parameters.m_ActivationFunc = 4; + data.m_Parameters.m_ClippingThresProj = 0.0; + data.m_Parameters.m_ClippingThresCell = 0.0; + + + // List of outputs + std::vector scratchBufferVector(batchSize * scratchBufferSize, 0.f); + auto scratchBufferTensor = MakeTensor(scratchBufferTensorInfo, scratchBufferVector); + LayerTestResult ret0(scratchBufferTensorInfo); + + // Output state for a certain time step + std::vector outputStateOutVector(batchSize * outputSize, 0.f); + auto outputStateOutTensor = MakeTensor(outputStateOutTensorInfo, outputStateOutVector); + LayerTestResult ret1(outputStateOutTensorInfo); + + // Cell state for a certain time step + std::vector cellStateOutVector(batchSize * cellSize, 0.f); + auto cellStateOutTensor = MakeTensor(cellStateOutTensorInfo, cellStateOutVector); + LayerTestResult ret2(cellStateOutTensorInfo); + + // Output for a certain time step + std::vector outputVector(batchSize * outputSize, 0.f); + auto outputTensor = MakeTensor(outputTensorInfo, outputVector); + std::vector outputData; + outputData.assign(outputExpected.data(), outputExpected.data() + batchSize*outputSize); + LayerTestResult ret3(outputTensorInfo); + ret3.outputExpected = MakeTensor(outputTensorInfo, outputData); + + // Prepare the inputs and outputs for the workload + std::unique_ptr inputHandle = + workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputStateInHandle = + workloadFactory.CreateTensorHandle(outputStateInTensorInfo); + std::unique_ptr cellStateInHandle = + workloadFactory.CreateTensorHandle(cellStateInTensorInfo); + + std::unique_ptr scratchBufferHandle = + workloadFactory.CreateTensorHandle(scratchBufferTensorInfo); + std::unique_ptr outputStateOutHandle = + workloadFactory.CreateTensorHandle(outputStateOutTensorInfo); + std::unique_ptr cellStateOutHandle = + workloadFactory.CreateTensorHandle(cellStateOutTensorInfo); + std::unique_ptr outputHandle = + workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); + AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); + + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchBufferHandle.get()); + AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); + AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateLstm(data, info); + + + inputHandle->Allocate(); + outputStateInHandle->Allocate(); + cellStateInHandle->Allocate(); + + scratchBufferHandle->Allocate(); + outputStateOutHandle->Allocate(); + cellStateOutHandle->Allocate(); + outputHandle->Allocate(); + + + CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]); + CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]); + CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]); + + CopyDataToITensorHandle(scratchBufferHandle.get(), &scratchBufferTensor[0][0]); + CopyDataToITensorHandle(outputStateOutHandle.get(), &outputStateOutTensor[0][0]); + CopyDataToITensorHandle(cellStateOutHandle.get(), &cellStateOutTensor[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret0.output[0][0], scratchBufferHandle.get()); + CopyDataFromITensorHandle(&ret1.output[0][0], outputStateOutHandle.get()); + CopyDataFromITensorHandle(&ret2.output[0][0], cellStateOutHandle.get()); + CopyDataFromITensorHandle(&ret3.output[0][0], outputHandle.get()); + + return ret3; +} diff --git a/src/backends/test/MemCopyTests.cpp b/src/backends/test/MemCopyTests.cpp new file mode 100644 index 0000000000..44089c9d65 --- /dev/null +++ b/src/backends/test/MemCopyTests.cpp @@ -0,0 +1,180 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include +#include + +#include "armnn/ArmNN.hpp" +#include "backends/RefWorkloadFactory.hpp" +#if ARMCOMPUTECL_ENABLED +#include "backends/ClWorkloadFactory.hpp" +#endif +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonWorkloadFactory.hpp" +#endif +#include "backends/CpuTensorHandle.hpp" +#include "test/TensorHelpers.hpp" + +#include "TensorCopyUtils.hpp" +#include "WorkloadTestUtils.hpp" + +#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED +#include "../ArmComputeTensorUtils.hpp" +#endif + +BOOST_AUTO_TEST_SUITE(MemCopyTestSuite) + +void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) +{ + const std::array shapeData = { { 1u, 1u, 6u, 5u } }; + const armnn::TensorShape tensorShape(4, shapeData.data()); + const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); + boost::multi_array inputData = MakeTensor(tensorInfo, std::vector( + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + }) + ); + + boost::multi_array outputData(shapeData); + + auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); + auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); + + AllocateAndCopyDataToITensorHandle(inputTensorHandle.get(), inputData.data()); + outputTensorHandle->Allocate(); + + armnn::MemCopyQueueDescriptor memCopyQueueDesc; + armnn::WorkloadInfo workloadInfo; + + const unsigned int origin[4] = {}; + + auto workloadInput = (withSubtensors && srcWorkloadFactory.SupportsSubTensors()) + ? srcWorkloadFactory.CreateSubTensorHandle(*inputTensorHandle, tensorShape, origin) + : std::move(inputTensorHandle); + auto workloadOutput = (withSubtensors && dstWorkloadFactory.SupportsSubTensors()) + ? dstWorkloadFactory.CreateSubTensorHandle(*outputTensorHandle, tensorShape, origin) + : std::move(outputTensorHandle); + + AddInputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadInput.get()); + AddOutputToWorkload(memCopyQueueDesc, workloadInfo, tensorInfo, workloadOutput.get()); + + dstWorkloadFactory.CreateMemCopy(memCopyQueueDesc, workloadInfo)->Execute(); + + CopyDataFromITensorHandle(outputData.data(), workloadOutput.get()); + + BOOST_TEST(CompareTensors(inputData, outputData)); +} + +template +void MemCopyTest(bool withSubtensors) +{ + SrcWorkloadFactory srcWorkloadFactory; + DstWorkloadFactory dstWorkloadFactory; + MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); +} + +#if ARMCOMPUTECL_ENABLED || ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(AclTypeConversions) +{ + arm_compute::Strides strides(1,2,3,4); + armnn::TensorShape convertedStrides = armnn::armcomputetensorutils::GetStrides(strides); + BOOST_TEST(convertedStrides[0] == 4); + BOOST_TEST(convertedStrides[1] == 3); + BOOST_TEST(convertedStrides[2] == 2); + BOOST_TEST(convertedStrides[3] == 1); + + arm_compute::TensorShape shape(5,6,7,8); + armnn::TensorShape convertedshape = armnn::armcomputetensorutils::GetShape(shape); + BOOST_TEST(convertedshape[0] == 8); + BOOST_TEST(convertedshape[1] == 7); + BOOST_TEST(convertedshape[2] == 6); + BOOST_TEST(convertedshape[3] == 5); +} +#endif + +#if ARMCOMPUTECL_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) +{ + MemCopyTest(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) +{ + MemCopyTest(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) +{ + MemCopyTest(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) +{ + MemCopyTest(true); +} + +#endif // ARMCOMPUTECL_ENABLED + +#if ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) +{ + MemCopyTest(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) +{ + MemCopyTest(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) +{ + MemCopyTest(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) +{ + MemCopyTest(true); +} + +#endif // ARMCOMPUTENEON_ENABLED + +#if ARMCOMPUTECL_ENABLED && ARMCOMPUTENEON_ENABLED + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) +{ + MemCopyTest(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) +{ + MemCopyTest(false); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) +{ + MemCopyTest(true); +} + +BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) +{ + MemCopyTest(true); +} + +#endif + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/NormTestImpl.hpp b/src/backends/test/NormTestImpl.hpp new file mode 100644 index 0000000000..2690313655 --- /dev/null +++ b/src/backends/test/NormTestImpl.hpp @@ -0,0 +1,241 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "armnn/Exceptions.hpp" +#include "armnn/LayerSupport.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +LayerTestResult SimpleNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + const unsigned int inputHeight = 2; + const unsigned int inputWidth = 2; + const unsigned int inputChannels = 1; + const unsigned int inputNum = 2; + + unsigned int outputHeight = inputHeight; + unsigned int outputWidth = inputWidth; + unsigned int outputChannels = inputChannels; + unsigned int outputNum = inputNum; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + + auto inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + auto outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + LayerTestResult ret(outputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, std::vector({ + // Batch #0 + 1.0f, 2.0f, + 3.0f, 4.0f, + // Batch #1 + 5.0f, 6.0f, + 7.0f, 8.0f + })); + + float alpha = 1.f; + float beta = 1.f; + float kappa = 1.f; + uint32_t normSize = 3; + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::NormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_NormChannelType = normChannel; + data.m_Parameters.m_NormMethodType = normMethod; + data.m_Parameters.m_NormSize = normSize; + data.m_Parameters.m_Alpha = alpha; + data.m_Parameters.m_Beta = beta; + data.m_Parameters.m_K = kappa; + + armnn::PassthroughCpuTensorHandle refHandle(outputTensorInfo, &ret.outputExpected[0][0][0][0]); + armnn::NormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, &refHandle); + + std::unique_ptr workload = workloadFactory.CreateNormalization(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + switch (normMethod) + { + case armnn::NormalizationAlgorithmMethod::LocalBrightness: + { + switch (normChannel) + { + case armnn::NormalizationAlgorithmChannel::Within: + { + // When normalising within channels, the 3x3 kernel covers the entire 2x2 input at every index. + // Therefore, all output values should equal the inputs, but divided by: + // pow((kappa + (accumulatedScale * alpha)), beta) + // ...where accumulatedScale is the sum of every element squared. + float divisor[inputNum]; + for(int i = 0; i < boost::numeric_cast(inputNum); i++) + { + float accumulatedScale = input[i][0][0][0]*input[i][0][0][0] + + input[i][0][0][1]*input[i][0][0][1] + + input[i][0][1][0]*input[i][0][1][0] + + input[i][0][1][1]*input[i][0][1][1]; + divisor[i] = powf((kappa + accumulatedScale * alpha), beta); + } + ret.outputExpected = MakeTensor(outputTensorInfo, + std::vector({input[0][0][0][0]/divisor[0], + input[0][0][0][1]/divisor[0], + input[0][0][1][0]/divisor[0], + input[0][0][1][1]/divisor[0], + input[1][0][0][0]/divisor[1], + input[1][0][0][1]/divisor[1], + input[1][0][1][0]/divisor[1], + input[1][0][1][1]/divisor[1]})); + break; + } + case armnn::NormalizationAlgorithmChannel::Across: + { + // When normalising across channels, all output values should equal the inputs, but multiplied by: + // pow((kappa + (accumulatedScale * alpha)), -beta) + // ...where accumulatedScale is the sum of the inputs for adjacent channels for this element squared + // ...where adjacent channels means within half the normSize for the channel + // The test data has only one channel, so this is simplified below. + std::vector outputVector; + for (int n = 0; n < boost::numeric_cast(inputNum); ++n) + { + for (int h = 0; h < boost::numeric_cast(inputHeight); ++h) + { + for (int w = 0; w < boost::numeric_cast(inputWidth); ++w) + { + float accumulatedScale = input[n][0][h][w]*input[n][0][h][w]; + float scale = powf((kappa + accumulatedScale * alpha), -beta); + outputVector.push_back(input[n][0][h][w] * scale); + } + } + } + ret.outputExpected = MakeTensor(outputTensorInfo, outputVector); + break; + } + default: + { + throw armnn::UnimplementedException("Unsupported normalisation channel type, " + "only Across and Within are supported"); + } + } + break; + } + case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough. + default: + { + throw armnn::UnimplementedException("Unsupported normalisation method type, " + "only LocalBrightness is supported"); + } + } + + return ret; +} + +LayerTestResult CompareNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::NormalizationAlgorithmChannel normChannel, + armnn::NormalizationAlgorithmMethod normMethod) +{ + constexpr unsigned int inputNum = 5; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = inputHeight; + constexpr unsigned int outputWidth = inputWidth; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + LayerTestResult ret(outputTensorInfo); + + auto input = MakeRandomTensor(inputTensorInfo, 111234); + + constexpr float alpha = 1.f; + constexpr float beta = 1.f; + constexpr float kappa = 1.f; + constexpr uint32_t normSize = 5; + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::NormalizationQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_NormChannelType = normChannel; + data.m_Parameters.m_NormMethodType = normMethod; + data.m_Parameters.m_NormSize = normSize; + data.m_Parameters.m_Alpha = alpha; + data.m_Parameters.m_Beta = beta; + data.m_Parameters.m_K = kappa; + + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + armnn::NormalizationQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + // Don't execute if Normalization is not supported for the method and channel types, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + ret.supported = armnn::IsNormalizationSupported(compute, inputTensorInfo, outputTensorInfo, data.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!ret.supported) + { + return ret; + } + + std::unique_ptr workload = workloadFactory.CreateNormalization(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateNormalization(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get()); + + return ret; +} + diff --git a/src/backends/test/PermuteTestImpl.hpp b/src/backends/test/PermuteTestImpl.hpp new file mode 100644 index 0000000000..b49c539b2e --- /dev/null +++ b/src/backends/test/PermuteTestImpl.hpp @@ -0,0 +1,225 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +template +LayerTestResult SimplePermuteTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::PermuteDescriptor descriptor, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + const std::vector& inputData, + const std::vector& outputExpectedData) +{ + auto input = MakeTensor(inputTensorInfo, inputData); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, outputExpectedData); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::PermuteQueueDescriptor data; + data.m_Parameters = descriptor; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreatePermute(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult SimplePermuteFloat32TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 2.0f, + 3.0f, 4.0f, + + 5.0f, 6.0f, + 7.0f, 8.0f + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 5.0f, 2.0f, 6.0f, + 3.0f, 7.0f, 4.0f, 8.0f + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult SimplePermuteUint8TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.0f); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.0f); + + std::vector input = std::vector( + { + 1, 2, + 3, 4, + + 5, 6, + 7, 8 + }); + + std::vector outputExpected = std::vector( + { + 1, 5, 2, 6, + 3, 7, 4, 8 + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult +PermuteFloat32ValueSet1TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 2, 3 }; + unsigned int outputShape[] = { 1, 3, 2, 2 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 2U, 3U, 1U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 2.0f, 3.0f, + 11.0f, 12.0f, 13.0f, + 21.0f, 22.0f, 23.0f, + 31.0f, 32.0f, 33.0f, + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 11.0f, 21.0f, 31.0f, + 2.0f, 12.0f, 22.0f, 32.0f, + 3.0f, 13.0f, 23.0f, 33.0f, + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult +PermuteFloat32ValueSet2TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 3, 2, 2 }; + unsigned int outputShape[] = { 1, 2, 2, 3 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 3U, 1U, 2U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 11.0f, 21.0f, 31.0f, + 2.0f, 12.0f, 22.0f, 32.0f, + 3.0f, 13.0f, 23.0f, 33.0f, + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 2.0f, 3.0f, + 11.0f, 12.0f, 13.0f, + 21.0f, 22.0f, 23.0f, + 31.0f, 32.0f, 33.0f, + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} + +LayerTestResult +PermuteFloat32ValueSet3TestCommon(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2, 3, 3 }; + unsigned int outputShape[] = { 1, 3, 2, 3 }; + + armnn::PermuteDescriptor descriptor; + descriptor.m_DimMappings = {0U, 2U, 3U, 1U}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 1.0f, 2.0f, 3.0f, + 11.0f, 12.0f, 13.0f, + 21.0f, 22.0f, 23.0f, + 31.0f, 32.0f, 33.0f, + 41.0f, 42.0f, 43.0f, + 51.0f, 52.0f, 53.0f, + }); + + std::vector outputExpected = std::vector( + { + 1.0f, 11.0f, 21.0f, 31.0f, 41.0f, 51.0f, + 2.0f, 12.0f, 22.0f, 32.0f, 42.0f, 52.0f, + 3.0f, 13.0f, 23.0f, 33.0f, 43.0f, 53.0f, + }); + + return SimplePermuteTestImpl(workloadFactory, descriptor, inputTensorInfo, + outputTensorInfo, input, outputExpected); +} diff --git a/src/backends/test/Pooling2dTestImpl.hpp b/src/backends/test/Pooling2dTestImpl.hpp new file mode 100644 index 0000000000..e8c7e86e9d --- /dev/null +++ b/src/backends/test/Pooling2dTestImpl.hpp @@ -0,0 +1,1116 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include + +template +LayerTestResult SimplePooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::Pooling2dDescriptor descriptor, + float qScale, + int32_t qOffset, + const boost::multi_array& input, + const boost::multi_array& outputExpected) +{ + unsigned int inputHeight = boost::numeric_cast(input.shape()[2]); + unsigned int inputWidth = boost::numeric_cast(input.shape()[3]); + unsigned int inputChannels = boost::numeric_cast(input.shape()[1]); + unsigned int inputBatchSize = boost::numeric_cast(input.shape()[0]); + + unsigned int outputHeight = boost::numeric_cast(outputExpected.shape()[2]); + unsigned int outputWidth = boost::numeric_cast(outputExpected.shape()[3]); + unsigned int outputChannels = boost::numeric_cast(outputExpected.shape()[1]); + unsigned int outputBatchSize = boost::numeric_cast(outputExpected.shape()[0]); + + armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth }, + armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth }, + armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + LayerTestResult result(outputTensorInfo); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Pooling2dQueueDescriptor queueDescriptor; + queueDescriptor.m_Parameters = descriptor; + armnn::WorkloadInfo workloadInfo; + AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get()); + + // Don't execute if Pooling is not supported, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + result.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, + queueDescriptor.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!result.supported) + { + return result; + } + + std::unique_ptr workload = workloadFactory.CreatePooling2d(queueDescriptor, workloadInfo); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get()); + + result.outputExpected = outputExpected; + + return result; +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 3x3 +// Stride: (2,4) +// input size: 8x13 +// channels: 2 +// batch size: 2 +// +template +LayerTestResult SimpleMaxPooling2dSize3x3Stride2x4TestCommon(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 4; + // forceNoPadding is mainly used for compatibility with ARM Compute. + // As of 16/05/2017, it errors if padX or padY are equal to or greater than the pool size. + descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; + descriptor.m_PadTop = descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + unsigned int inputWidth = 8; + unsigned int inputHeight = 13; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 2; + unsigned int batchSize = 2; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + std::vector singleChannelData({ + 0.0f, 4.0f, 8.0f, 1.0f, 6.0f, 4.0f, 5.0f, 8.0f, + 1.0f, 1.0f, 6.0f, 0.0f, 3.0f, 7.0f, 4.0f, 7.0f, + 8.0f, 5.0f, 0.0f, 0.0f, 8.0f, 3.0f, 4.0f, 3.0f, + 8.0f, 2.0f, 5.0f, 4.0f, 1.0f, 9.0f, 2.0f, 0.0f, + 5.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 7.0f, 2.0f, + 1.0f, 2.0f, 6.0f, 2.0f, 7.0f, 9.0f, 5.0f, 2.0f, + 9.0f, 7.0f, 3.0f, 1.0f, 3.0f, 4.0f, 8.0f, 3.0f, + 1.0f, 0.0f, 0.0f, 5.0f, 5.0f, 4.0f, 2.0f, 0.0f, + 6.0f, 4.0f, 3.0f, 6.0f, 9.0f, 5.0f, 5.0f, 6.0f, + 8.0f, 7.0f, 9.0f, 6.0f, 1.0f, 4.0f, 1.0f, 9.0f, + 7.0f, 1.0f, 9.0f, 2.0f, 9.0f, 9.0f, 8.0f, 1.0f, + 4.0f, 4.0f, 5.0f, 9.0f, 2.0f, 6.0f, 6.0f, 4.0f, + 3.0f, 5.0f, 4.0f, 0.0f, 1.0f, 5.0f, 9.0f, 7.0f, + }); + + // Constructs input data. + std::vector inputData; + auto negator = [](float f) { return -f; }; + + // First image (two channels where the second channel is the negative of the first one). + inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); + std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); + + // Second image (same as first image). + inputData.insert(inputData.end(), singleChannelData.begin(), singleChannelData.end()); + std::transform(singleChannelData.begin(), singleChannelData.end(), std::back_inserter(inputData), negator); + + auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, inputData)); + + // These were calculated manually. + auto shape(GetTensorShapeAsArray<4>(outputTensorInfo)); + boost::multi_array outputExpected(shape); + if (forceNoPadding) + { + outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 8.0f, 8.0f, 8.0f, + 9.0f, 7.0f, 9.0f, + 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, -3.0f, + -1.0f, 0.0f, 0.0f, + -1.0f, -1.0f, -1.0f, + + 8.0f, 8.0f, 8.0f, + 9.0f, 7.0f, 9.0f, + 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, -3.0f, + -1.0f, 0.0f, 0.0f, + -1.0f, -1.0f, -1.0f + })); + } + else + { + outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, + 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, + 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, + 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f, + + 0.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, + 0.0f, 9.0f, 7.0f, 9.0f, 9.0f, 3.0f, + 0.0f, 8.0f, 9.0f, 9.0f, 9.0f, 9.0f, + + 0.0f, 0.0f, 0.0f, 0.0f,-3.0f, 0.0f, + 0.0f,-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f,-1.0f,-1.0f,-1.0f,-1.0f, 0.0f + })); + } + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult SimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 2.5f, 4.0f, + 1.0f, 2.5f, 4.0f, + 1.0f, 2.5f, 4.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult LargeTensorsAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 100; + descriptor.m_StrideX = descriptor.m_StrideY = 5; + descriptor.m_PadLeft = 50; + descriptor.m_PadRight = 50; + descriptor.m_PadTop = 50; + descriptor.m_PadBottom = 50; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 5, 3, 52, 60 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 5, 3, 11, 13 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + std::vector inputVec; + + for (unsigned int i = 0 ; i < inputTensorInfo.GetShape().GetNumElements(); ++i) + { + inputVec.push_back(1); + } + + auto input = MakeTensor(inputTensorInfo, inputVec); + + std::vector outputVec; + + for (unsigned int i = 0 ; i < outputTensorInfo.GetShape().GetNumElements(); ++i) + { + outputVec.push_back(1); + } + + auto outputExpected = MakeTensor(outputTensorInfo, outputVec); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult SimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + 1.0f, 7.0f, 1.0f, 7.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType()); + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 5.0f, 5.0f, + 5.0f, 5.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult L2Pooling2dSize3Stride1TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, + 1.0f, 2.0f, 2.0f, 1.0f, + 5.0f, 4.0f, 1.0f, 5.0f, + 2.0f, 1.0f, 5.0f, 2.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType()); + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 3.0f, 3.0f, + 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult L2Pooling2dSize3Stride3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 3; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType()); + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 3.0f, 3.0f, 3.0f, + 3.0f, 3.0f, 3.0f, + 3.0f, 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult L2Pooling2dSize3Stride4TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 4; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 2.0f, 1.0f, 5.0f, 0.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 0.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 0.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType()); + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 3.0f, 3.0f, + 3.0f, 3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult L2Pooling2dSize7TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 7; + descriptor.m_StrideX = descriptor.m_StrideY = 7; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 7, 7 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 0.0f, 2.0f, 0.0f, 3.0f, 0.0f, 4.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 5.0f, 0.0f, 6.0f, 0.0f, 7.0f, 0.0f, + 8.0f, 0.0f, 9.0f, 0.0f, 10.0f, 0.0f, 5.0f, + 0.0f, 5.0f, 0.0f, 2.0f, 0.0f, 1.0f, 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType()); + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult L2Pooling2dSize9TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 9; + descriptor.m_StrideX = descriptor.m_StrideY = 9; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9 }, armnn::GetDataType()); + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, 2.0f, 1.0f, 5.0f, + 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, 1.0f, 2.0f, 2.0f, + 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, 5.0f, 4.0f, 1.0f, + })); + + armnn::TensorInfo outputTensorInfo({ 1, 1, 1, 1 }, armnn::GetDataType()); + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult AsymmetricNonSquarePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::TensorInfo inputTensorInfo({ 1, 1, 1, 3 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType()); + + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = 2; + descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 2; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 2; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + // Construct input data. + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 3.0f, 4.0f, + })); + + // These were calculated manually. + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 0.0f, 3.0f, 0.0f, 3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult ComparePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + armnn::PoolingAlgorithm poolingType, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + const unsigned int inputWidth = 16; + const unsigned int inputHeight = 32; + const unsigned int channelCount = 2; + const unsigned int batchSize = 5; + + const unsigned int poolSize = 3; + const unsigned int strideX = 2; + const unsigned int strideY = 4; + const unsigned int padX = 0; + const unsigned int padY = 0; + + const unsigned int outputWidth = (inputWidth + 2 * padX + strideX - poolSize) / strideX; + const unsigned int outputHeight = (inputHeight + 2 * padY + strideY - poolSize) / strideY; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { batchSize, channelCount, inputHeight, inputWidth }; + unsigned int outputShape[] = { batchSize, channelCount, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType()); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + boost::multi_array input = MakeRandomTensor(inputTensorInfo, 81715); + + LayerTestResult comparisonResult(outputTensorInfo); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::Pooling2dQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + data.m_Parameters.m_PoolType = poolingType; + data.m_Parameters.m_PoolWidth = poolSize; + data.m_Parameters.m_PoolHeight = poolSize; + data.m_Parameters.m_StrideX = strideX; + data.m_Parameters.m_StrideY = strideY; + data.m_Parameters.m_PadLeft = padX; + data.m_Parameters.m_PadRight = padX; + data.m_Parameters.m_PadTop = padY; + data.m_Parameters.m_PadBottom = padY; + data.m_Parameters.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + // Don't execute if Pooling is not supported, as an exception will be raised. + armnn::Compute compute = workloadFactory.GetCompute(); + const size_t reasonIfUnsupportedMaxLen = 255; + char reasonIfUnsupported[reasonIfUnsupportedMaxLen+1]; + comparisonResult.supported = armnn::IsPooling2dSupported(compute, inputTensorInfo, outputTensorInfo, + data.m_Parameters, + reasonIfUnsupported, reasonIfUnsupportedMaxLen); + if (!comparisonResult.supported) + { + return comparisonResult; + } + + armnn::Pooling2dQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreatePooling2d(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreatePooling2d(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]); + + workload->Execute(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get()); + CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get()); + + return comparisonResult; +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 2x2 +// Stride: (2,2) +// input size: 4x4 +// channels: 1 +// batch size: 1 +// +template +LayerTestResult SimpleMaxPooling2dSize2x2Stride2x2TestCommon(armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_PadLeft = descriptor.m_PadRight = forceNoPadding ? 0 : 3; + descriptor.m_PadTop = descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::Exclude; + + unsigned int inputWidth = 4; + unsigned int inputHeight = 4; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 1; + unsigned int batchSize = 1; + + std::vector inputData = { + 510.0f, 222.0f, 780.0f, 654.0f, + 141.0f, 276.0f, 15.0f, 546.0f, + 303.0f, 618.0f, 582.0f, 339.0f, + 438.0f, 564.0f, 573.0f, 402.0f + }; + + // Note that left and right edges will be 0.f, due to the 2x2 max pooling only accessing zeros here. + std::vector expectedOutputDataWithPadding = { + 0.0f, 510.0f, 780.0f, 654.0f, 0.0f, + 0.0f, 438.0f, 618.0f, 402.0f, 0.0f + }; + + std::vector expectedOutputDataNoPadding = { + 510.0f, 780.0f, + 618.0f, 582.0f + }; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType()); + + // Scale and offset should match input - we're just calculating maximum values. + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, inputData)); + + auto outputExpected = MakeTensor(outputTensorInfo, + forceNoPadding ? QuantizedVector(qScale, qOffset, expectedOutputDataNoPadding) : + QuantizedVector(qScale, qOffset, expectedOutputDataWithPadding)); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +// +// Tests max pooling with the following parameters: +// +// Pooling size: 3x2 +// Stride: (2,2) +// input size: 3x2 +// channels: 1 +// batch size: 1 +// +template +LayerTestResult IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon( + armnn::IWorkloadFactory& workloadFactory, + bool forceNoPadding, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = 3; + descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = 2; + descriptor.m_StrideY = 2; + descriptor.m_PadLeft = (forceNoPadding) ? 0 : 1; + descriptor.m_PadRight = descriptor.m_PadLeft; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Floor; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + unsigned int inputWidth = 3; + unsigned int inputHeight = 2; + unsigned int outputWidth = + (inputWidth + descriptor.m_PadLeft + descriptor.m_PadRight + descriptor.m_StrideX - descriptor.m_PoolWidth) / + descriptor.m_StrideX; + unsigned int outputHeight = + (inputHeight + descriptor.m_PadTop + descriptor.m_PadBottom + descriptor.m_StrideY - descriptor.m_PoolHeight) / + descriptor.m_StrideY; + unsigned int channels = 1; + unsigned int batchSize = 1; + + std::vector inputData = { + 3.0f, 6.0f, 9.0f, + 12.0f, 15.0f, 18.0f, + }; + + std::vector expectedOutputDataWithPadding = { + 6.0f, 8.0f, + }; + + std::vector expectedOutputDataNoPadding = { + 10.5f, + }; + + armnn::TensorInfo inputTensorInfo({ batchSize, channels, inputHeight, inputWidth }, armnn::GetDataType()); + + // Scale and offset should match input - we're just calculating average values. + armnn::TensorInfo outputTensorInfo({ batchSize, channels, outputHeight, outputWidth }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, QuantizedVector(qScale, qOffset, inputData)); + + auto outputExpected = MakeTensor(outputTensorInfo, + forceNoPadding ? QuantizedVector(qScale, qOffset, expectedOutputDataNoPadding) : + QuantizedVector(qScale, qOffset, expectedOutputDataWithPadding)); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + + +template +LayerTestResult IgnorePaddingSimpleMaxPooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + -1.0f, -2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + -1.0f, 3.0f, 4.0f, + 1.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -4.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult IgnorePaddingMaxPooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Max; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + -1.0f, -2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + 1.0f, 2.0f, -3.0f, -4.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + -1.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 3.0f, 4.0f, 4.0f, + 2.0f, 2.0f, 2.0f, -3.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult IgnorePaddingSimpleAveragePooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + 12.0f, 20.0f, 32.0f, 40.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 3.0f, 13.0f, 10.0f, + 6.0f, 26.0f, 20.0f, + 3.0f, 13.0f, 10.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 0; + descriptor.m_PadRight = 0; + descriptor.m_PadTop = 0; + descriptor.m_PadBottom = 0; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + descriptor.m_OutputShapeRounding = armnn::OutputShapeRounding::Ceiling; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4}, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 2, 2 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 2.0f, 3.5f, + 2.0f, 3.5f + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult IgnorePaddingAveragePooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::Average; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 9.0f, 27.0f, 18.0f, 36.0f, + 18.0f, 9.0f, 18.0f, 9.0f, + 27.0f, 18.0f, 9.0f, 27.0f, + 9.0f, 27.0f, 9.0f, 18.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 7.0f, 11.0f, 13.0f, 9.0f, + 12.0f, 17.0f, 19.0f, 13.0f, + 12.0f, 16.0f, 16.0f, 10.0f, + 9.0f, 11.0f, 12.0f, 7.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult IgnorePaddingSimpleL2Pooling2dTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 2; + descriptor.m_StrideX = descriptor.m_StrideY = 2; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 2.0f, 4.0f, 8.0f, 16.0f, + 4.0f, 2.0f, 2.0f, 4.0f, + 8.0f, 2.0f, 4.0f, 2.0f, + 16.0f, 2.0f, 2.0f, 8.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 4.4721f, 8.0f, + 4.4721f, 2.6457f, 2.236f, + 8.0f, 1.4142f, 4.0f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} + +template +LayerTestResult IgnorePaddingL2Pooling2dSize3TestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 1.0f, + int32_t qOffset = 0) +{ + armnn::Pooling2dDescriptor descriptor; + descriptor.m_PoolType = armnn::PoolingAlgorithm::L2; + descriptor.m_PoolWidth = descriptor.m_PoolHeight = 3; + descriptor.m_StrideX = descriptor.m_StrideY = 1; + descriptor.m_PadLeft = 1; + descriptor.m_PadRight = 1; + descriptor.m_PadTop = 1; + descriptor.m_PadBottom = 1; + descriptor.m_PaddingMethod = armnn::PaddingMethod::IgnoreValue; + + armnn::TensorInfo inputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + } + + auto input = MakeTensor(inputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + 1.0f, 2.0f, 3.0f, 4.0f, + })); + + auto outputExpected = MakeTensor(outputTensorInfo, + QuantizedVector(qScale, qOffset, { + 1.0540f, 1.7638f, 2.5385f, 2.3570f, + 1.2909f, 2.1602f, 3.1091f, 2.8867f, + 1.2909f, 2.1602f, 3.1091f, 2.8867f, + 1.0540f, 1.7638f, 2.5385f, 2.3570f, + })); + + return SimplePooling2dTestImpl(workloadFactory, descriptor, qScale, qOffset, input, outputExpected); +} diff --git a/src/backends/test/QuantizeHelper.hpp b/src/backends/test/QuantizeHelper.hpp new file mode 100644 index 0000000000..bb4e561d59 --- /dev/null +++ b/src/backends/test/QuantizeHelper.hpp @@ -0,0 +1,91 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +#include +#include +#include +#include + +template +struct SelectiveQuantizer +{ + static T Quantize(float value, float scale, int32_t offset) + { + return armnn::Quantize(value, scale, offset); + } + + static float Dequantize(T value, float scale, int32_t offset) + { + return armnn::Dequantize(value, scale, offset); + } +}; + +template +struct SelectiveQuantizer +{ + static T Quantize(float value, float scale, int32_t offset) + { + boost::ignore_unused(scale, offset); + return value; + } + + static float Dequantize(T value, float scale, int32_t offset) + { + boost::ignore_unused(scale, offset); + return value; + } +}; + +template +T SelectiveQuantize(float value, float scale, int32_t offset) +{ + return SelectiveQuantizer()>::Quantize(value, scale, offset); +}; + +template +float SelectiveDequantize(T value, float scale, int32_t offset) +{ + return SelectiveQuantizer()>::Dequantize(value, scale, offset); +}; + +template +struct IsFloatingPointIterator +{ + static constexpr bool value=std::is_floating_point::value_type>::value; +}; + +template ::value, int>::type=0 // Makes sure fp iterator is valid. +> +std::vector QuantizedVector(float qScale, int32_t qOffset, FloatIt first, FloatIt last) +{ + std::vector quantized; + quantized.reserve(boost::numeric_cast(std::distance(first, last))); + + for (auto it = first; it != last; ++it) + { + auto f = *it; + T q =SelectiveQuantize(f, qScale, qOffset); + quantized.push_back(q); + } + + return quantized; +} + +template +std::vector QuantizedVector(float qScale, int32_t qOffset, const std::vector& array) +{ + return QuantizedVector(qScale, qOffset, array.begin(), array.end()); +} + +template +std::vector QuantizedVector(float qScale, int32_t qOffset, std::initializer_list array) +{ + return QuantizedVector(qScale, qOffset, array.begin(), array.end()); +} diff --git a/src/backends/test/Reference.cpp b/src/backends/test/Reference.cpp new file mode 100644 index 0000000000..62786a9ec4 --- /dev/null +++ b/src/backends/test/Reference.cpp @@ -0,0 +1,253 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include + +#include "LayerTests.hpp" +#include "test/TensorHelpers.hpp" + +#include "backends/RefWorkloadFactory.hpp" + +#include "test/UnitTests.hpp" + +BOOST_AUTO_TEST_SUITE(Compute_Reference) +using FactoryType = armnn::RefWorkloadFactory; + +// ============================================================================ +// UNIT tests + +// Convolution +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5, SimpleConvolution2d3x5Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5Uint8, SimpleConvolution2d3x5Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedConvolutionUint8, SimpleConvolution2d3x5Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution1dUint8, Convolution1dUint8Test, true) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3, SimpleConvolution2d3x3Test, true) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPaddingLargerThanHalfKernelSize, + Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest) +ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest) + +// Depthwise Convolution +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2d, DepthwiseConvolution2dTest, true) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2d, DepthwiseConvolution2dTest, false) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dUint8, DepthwiseConvolution2dUint8Test, false) + +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true) +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true) + +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true) +ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false) + +// Pooling +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, false) +ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, false) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2d, IgnorePaddingSimpleMaxPooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleMaxPooling2dUint8, IgnorePaddingSimpleMaxPooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3, IgnorePaddingMaxPooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingMaxPooling2dSize3Uint8, IgnorePaddingMaxPooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2d, IgnorePaddingSimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dUint8, IgnorePaddingSimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPadding, IgnorePaddingSimpleAveragePooling2dNoPaddingTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleAveragePooling2dNoPaddingUint8, + IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3, IgnorePaddingAveragePooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3Uint8, IgnorePaddingAveragePooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2d, IgnorePaddingSimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(IgnorePaddingSimpleL2Pooling2dUint8, IgnorePaddingSimpleL2Pooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3, IgnorePaddingL2Pooling2dSize3Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingL2Pooling2dSize3Uint8, IgnorePaddingL2Pooling2dSize3Uint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2d, SimpleAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleAveragePooling2dUint8, SimpleAveragePooling2dUint8Test) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, false) +ARMNN_AUTO_TEST_CASE(IgnorePaddingAveragePooling2dSize3x2Stride2x2NoPadding, + IgnorePaddingAveragePooling2dSize3x2Stride2x2Test, true) + +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2d, LargeTensorsAveragePooling2dTest) +ARMNN_AUTO_TEST_CASE(LargeTensorsAveragePooling2dUint8, LargeTensorsAveragePooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2d, SimpleL2Pooling2dTest) +ARMNN_AUTO_TEST_CASE(SimpleL2Pooling2dUint8, SimpleL2Pooling2dUint8Test) + +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7, L2Pooling2dSize7Test) +ARMNN_AUTO_TEST_CASE(L2Pooling2dSize7Uint8, L2Pooling2dSize7Uint8Test) + +ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2d, AsymmetricNonSquarePooling2dTest) +ARMNN_AUTO_TEST_CASE(AsymmNonSquarePooling2dUint8, AsymmetricNonSquarePooling2dUint8Test) + +// Activation +ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest) +ARMNN_AUTO_TEST_CASE(ConstantLinearActivationUint8, ConstantLinearActivationUint8Test) + +ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest) +ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest) + +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1Uint8, SimpleSoftmaxUint8Test, 1.0f) +ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2Uint8, SimpleSoftmaxUint8Test, 2.0f) + +ARMNN_AUTO_TEST_CASE(SimpleSigmoid, SimpleSigmoidTest) +ARMNN_AUTO_TEST_CASE(SimpleSigmoidUint8, SimpleSigmoidUint8Test) + +ARMNN_AUTO_TEST_CASE(ReLu1, BoundedReLuUpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6, BoundedReLuUpperBoundOnlyTest) +ARMNN_AUTO_TEST_CASE(ReLu1Uint8, BoundedReLuUint8UpperAndLowerBoundTest) +ARMNN_AUTO_TEST_CASE(ReLu6Uint8, BoundedReLuUint8UpperBoundOnlyTest) + +// Fully Conected +ARMNN_AUTO_TEST_CASE(SimpleFullyConnected, FullyConnectedFloat32Test, false, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedUint8, FullyConnectedUint8Test, false) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithBias, FullyConnectedFloat32Test, true, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedBiasedUint8, FullyConnectedUint8Test, true) +ARMNN_AUTO_TEST_CASE(SimpleFullyConnectedWithTranspose, FullyConnectedFloat32Test, false, true) + +ARMNN_AUTO_TEST_CASE(FullyConnectedLarge, FullyConnectedLargeTest, false) +ARMNN_AUTO_TEST_CASE(FullyConnectedLargeTransposed, FullyConnectedLargeTest, true) + +// Splitter +ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest) +ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test) + +ARMNN_AUTO_TEST_CASE(CopyViaSplitter, CopyViaSplitterTest) +ARMNN_AUTO_TEST_CASE(CopyViaSplitterUint8, CopyViaSplitterUint8Test) + +// Merger +ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest) +ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test) + +// Add +ARMNN_AUTO_TEST_CASE(SimpleAdd, AdditionTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(AddBroadcast, AdditionBroadcastTest) + +ARMNN_AUTO_TEST_CASE(AdditionUint8, AdditionUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcastUint8, AdditionBroadcastUint8Test) +ARMNN_AUTO_TEST_CASE(AddBroadcast1ElementUint8, AdditionBroadcast1ElementUint8Test) + +// Sub +ARMNN_AUTO_TEST_CASE(SimpleSub, SubtractionTest) +ARMNN_AUTO_TEST_CASE(SubBroadcast1Element, SubtractionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(SubBroadcast, SubtractionBroadcastTest) + +ARMNN_AUTO_TEST_CASE(SubtractionUint8, SubtractionUint8Test) +ARMNN_AUTO_TEST_CASE(SubBroadcastUint8, SubtractionBroadcastUint8Test) +ARMNN_AUTO_TEST_CASE(SubBroadcast1ElementUint8, SubtractionBroadcast1ElementUint8Test) + +// Div +ARMNN_AUTO_TEST_CASE(SimpleDivision, DivisionTest) +ARMNN_AUTO_TEST_CASE(DivisionByZero, DivisionByZeroTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1Element, DivisionBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(DivisionBroadcast1DVector, DivisionBroadcast1DVectorTest) +// NOTE: division by zero for quantized div needs more attention +// see IVGCVSW-1849 +ARMNN_AUTO_TEST_CASE(DivisionUint8, DivisionUint8Test) +ARMNN_AUTO_TEST_CASE(DivisionUint8Broadcast1Element, DivisionBroadcast1ElementUint8Test) +ARMNN_AUTO_TEST_CASE(DivisionUint8Broadcast1DVector, DivisionBroadcast1DVectorUint8Test) + +// Mul +ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest) +ARMNN_AUTO_TEST_CASE(MultiplicationUint8, MultiplicationUint8Test) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1ElementUint8, MultiplicationBroadcast1ElementUint8Test) +ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadcast1DVectorUint8Test) + +// Batch Norm +ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) +ARMNN_AUTO_TEST_CASE(BatchNormUint8, BatchNormUint8Test) + +// Resize Bilinear +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinear, SimpleResizeBilinearTest) +ARMNN_AUTO_TEST_CASE(SimpleResizeBilinearUint8, SimpleResizeBilinearUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNop, ResizeBilinearNopTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearNopUint8, ResizeBilinearNopUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMin, ResizeBilinearSqMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearSqMinUint8, ResizeBilinearSqMinUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMin, ResizeBilinearMinTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMinUint8, ResizeBilinearMinUint8Test) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMag, ResizeBilinearMagTest) +ARMNN_AUTO_TEST_CASE(ResizeBilinearMagUint8, ResizeBilinearMagUint8Test) + +// Fake Quantization +ARMNN_AUTO_TEST_CASE(FakeQuantization, FakeQuantizationTest) + +// L2 Noramlization +ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization2d, L2Normalization2dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization3d, L2Normalization3dTest) +ARMNN_AUTO_TEST_CASE(L2Normalization4d, L2Normalization4dTest) + +// Constant +ARMNN_AUTO_TEST_CASE(Constant, ConstantTest) +ARMNN_AUTO_TEST_CASE(ConstantUint8, ConstantUint8Test) + +// Concat +ARMNN_AUTO_TEST_CASE(Concatenation1d, Concatenation1dTest) +ARMNN_AUTO_TEST_CASE(Concatenation1dUint8, Concatenation1dUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0, Concatenation2dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0Uint8, Concatenation2dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1, Concatenation2dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1Uint8, Concatenation2dDim1Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDims, Concatenation2dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim0DiffInputDimsUint8, Concatenation2dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDims, Concatenation2dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation2dDim1DiffInputDimsUint8, Concatenation2dDim1DiffInputDimsUint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0, Concatenation3dDim0Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0Uint8, Concatenation3dDim0Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1, Concatenation3dDim1Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1Uint8, Concatenation3dDim1Uint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2, Concatenation3dDim2Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2Uint8, Concatenation3dDim2Uint8Test) + +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDims, Concatenation3dDim0DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim0DiffInputDimsUint8, Concatenation3dDim0DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDims, Concatenation3dDim1DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim1DiffInputDimsUint8, Concatenation3dDim1DiffInputDimsUint8Test) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDims, Concatenation3dDim2DiffInputDimsTest) +ARMNN_AUTO_TEST_CASE(Concatenation3dDim2DiffInputDimsUint8, Concatenation3dDim2DiffInputDimsUint8Test) + +// Floor +ARMNN_AUTO_TEST_CASE(SimpleFloor, SimpleFloorTest) + +// Reshape +ARMNN_AUTO_TEST_CASE(SimpleReshapeFloat32, SimpleReshapeFloat32Test) +ARMNN_AUTO_TEST_CASE(SimpleReshapeUint8, SimpleReshapeUint8Test) + +// Permute +ARMNN_AUTO_TEST_CASE(SimplePermuteFloat32, SimplePermuteFloat32Test) +ARMNN_AUTO_TEST_CASE(SimplePermuteUint8, SimplePermuteUint8Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet1, PermuteFloat32ValueSet1Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test) +ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test) + +// Convert from Float16 to Float32 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test) +// Convert from Float32 to Float16 +ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test) + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/ReshapeTestImpl.hpp b/src/backends/test/ReshapeTestImpl.hpp new file mode 100644 index 0000000000..5d32d9d3a6 --- /dev/null +++ b/src/backends/test/ReshapeTestImpl.hpp @@ -0,0 +1,177 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +template +LayerTestResult SimpleReshapeTestImpl( + armnn::IWorkloadFactory& workloadFactory, + armnn::TensorInfo inputTensorInfo, + armnn::TensorInfo outputTensorInfo, + const std::vector& inputData, + const std::vector& outputExpectedData) +{ + auto input = MakeTensor(inputTensorInfo, inputData); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, outputExpectedData); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ReshapeQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateReshape(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult SimpleReshapeFloat32Test(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 2, 3, 3 }; + unsigned int outputShape[] = { 2, 2, 9, 1 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + std::vector input = std::vector( + { + 0.0f, 1.0f, 2.0f, + 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, + + 9.0f, 10.0f, 11.0f, + 12.0f, 13.0f, 14.0f, + 15.0f, 16.0f, 17.0f, + + 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, + 24.0f, 25.0f, 26.0f, + + 27.0f, 28.0f, 29.0f, + 30.0f, 31.0f, 32.0f, + 33.0f, 34.0f, 35.0f, + }); + + std::vector outputExpected = std::vector( + { + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + + 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, + + 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + }); + + return SimpleReshapeTestImpl(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); +} + +LayerTestResult SimpleFloorTest(armnn::IWorkloadFactory& workloadFactory) +{ + const armnn::TensorInfo inputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32); + const armnn::TensorInfo outputTensorInfo(inputTensorInfo); + + auto input = MakeTensor(inputTensorInfo, + { -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f, + 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f }); + + LayerTestResult ret(outputTensorInfo); + ret.outputExpected = MakeTensor(outputTensorInfo, + { -38.0f, -16.0f, -9.0f, -2.0f, -2.0f, -2.0f, -1.0f, -1.0f, 0.0f, + 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 2.0f, 8.0f, 15.0f, 37.0f }); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::FloorQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateFloor(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get()); + + return ret; +} + +LayerTestResult SimpleReshapeUint8Test(armnn::IWorkloadFactory& workloadFactory) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 2, 3, 3 }; + unsigned int outputShape[] = { 2, 2, 9, 1 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::QuantisedAsymm8); + inputTensorInfo.SetQuantizationScale(1.0f); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::QuantisedAsymm8); + outputTensorInfo.SetQuantizationScale(1.0f); + + std::vector input = std::vector( + { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + + 9, 10, 11, + 12, 13, 14, + 15, 16, 17, + + 18, 19, 20, + 21, 22, 23, + 24, 25, 26, + + 27, 28, 29, + 30, 31, 32, + 33, 34, 35, + }); + + std::vector outputExpected = std::vector( + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, + + 9, 10, 11, 12, 13, 14, 15, 16, 17, + + 18, 19, 20, 21, 22, 23, 24, 25, 26, + + 27, 28, 29, 30, 31, 32, 33, 34, 35, + }); + + return SimpleReshapeTestImpl(workloadFactory, inputTensorInfo, outputTensorInfo, input, outputExpected); +} diff --git a/src/backends/test/SoftmaxTestImpl.hpp b/src/backends/test/SoftmaxTestImpl.hpp new file mode 100644 index 0000000000..5bc13fa21c --- /dev/null +++ b/src/backends/test/SoftmaxTestImpl.hpp @@ -0,0 +1,153 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include "test/TensorHelpers.hpp" +#include "QuantizeHelper.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include + +template +LayerTestResult SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, float beta) +{ + using std::exp; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 2, 4 }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType()); + float qScale = 1.f / 256.f; + int qOffset = 0; + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + + outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType()); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + LayerTestResult ret(outputTensorInfo); + + // Each row is independently softmax'd. + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(qScale, 0, { + 0.f, 1.f, 0.f, 0.f, + .5f, 0.f, 0.f, 0.f, + }))); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SoftmaxQueueDescriptor data; + data.m_Parameters.m_Beta = beta; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateSoftmax(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + + float x0[4] = { exp((0.f - 1.0f) * beta), exp((1.0f - 1.0f) * beta), + exp((0.0f - 1.0f) * beta), exp((0.0f - 1.0f) * beta) }; + float sum0 = x0[0] + x0[1] + x0[2] + x0[3]; + float x1[4] = { exp((0.5f - 0.5f) * beta), exp((0.0f - 0.5f) * beta), + exp((0.0f - 0.5f) * beta), exp((0.0f - 0.5f) * beta) }; + float sum1 = x1[0] + x1[1] + x1[2] + x1[3]; + + ret.outputExpected = MakeTensor(outputTensorInfo, std::vector( + QuantizedVector(qScale, qOffset, { + x0[0] / sum0, x0[1] / sum0, x0[2] / sum0, x0[3] / sum0, + x1[0] / sum1, x1[1] / sum1, x1[2] / sum1, x1[3] / sum1 + }))); + + return ret; +} + +template +LayerTestResult CompareSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFactory, + armnn::IWorkloadFactory& refWorkloadFactory, + float beta) +{ + + const int batchSize = 20; + const int channels = 30; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { batchSize, channels }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType()); + outputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType()); + float qScale = 1.f / 256.f; + int qOffset = 0; + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo.SetQuantizationScale(qScale); + outputTensorInfo.SetQuantizationOffset(qOffset); + + + LayerTestResult ret(outputTensorInfo); + auto input = MakeRandomTensor(inputTensorInfo, 0xF00D, 0.0f, 1.0f); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo); + + armnn::SoftmaxQueueDescriptor data; + data.m_Parameters.m_Beta = beta; + + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo); + std::unique_ptr inputHandleRef = refWorkloadFactory.CreateTensorHandle(inputTensorInfo); + + + armnn::SoftmaxQueueDescriptor refData = data; + armnn::WorkloadInfo refInfo = info; + SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get()); + SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get()); + + std::unique_ptr workload = workloadFactory.CreateSoftmax(data, info); + std::unique_ptr workloadRef = refWorkloadFactory.CreateSoftmax(refData, refInfo); + + outputHandleRef->Allocate(); + inputHandleRef->Allocate(); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0]); + CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]); + + workloadFactory.Finalize(); + workload->Execute(); + refWorkloadFactory.Finalize(); + workloadRef->Execute(); + + CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get()); + CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get()); + + return ret; +} diff --git a/src/backends/test/SplitterTestImpl.hpp b/src/backends/test/SplitterTestImpl.hpp new file mode 100644 index 0000000000..5dcc412d0e --- /dev/null +++ b/src/backends/test/SplitterTestImpl.hpp @@ -0,0 +1,307 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include + +#include "test/TensorHelpers.hpp" + +#include "backends/CpuTensorHandle.hpp" +#include "backends/WorkloadFactory.hpp" + +#include "backends/test/QuantizeHelper.hpp" + + +template +std::vector> SplitterTestCommon(armnn::IWorkloadFactory& workloadFactory, + float qScale = 0.0f, + int32_t qOffset = 0) +{ + unsigned int inputWidth = 5; + unsigned int inputHeight = 6; + unsigned int inputChannels = 3; + + // NOTE: Compute Library imposes a restriction that the x and y dimension (input height and width) + // cannot be split. + // For the reasons for this, see first comment on https://jira.arm.com/browse/IVGCVSW-1239 + // + // This test has therefore been recast to split the channels, then split the resulting subtensor. + + // To take channel 0 of original output + // and channel 0 and channel 1 of the split subtensor. + unsigned int outputWidth1 = inputWidth; + unsigned int outputHeight1 = inputHeight; + unsigned int outputChannels1 = 1; + + // To take channel 1 and 2 of the original output. + unsigned int outputWidth2 = inputWidth; + unsigned int outputHeight2 = inputHeight; + unsigned int outputChannels2 = 2; + + + // Define the tensor descriptors. + armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType()); + + // Outputs of the original split. + armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType()); + + // Outputs of the subsequent subtensor split. + armnn::TensorInfo outputTensorInfo3({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType()); + armnn::TensorInfo outputTensorInfo4({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType()); + + // Set quantization parameters if the requested type is a quantized type. + // The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize. + if(armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(qScale); + inputTensorInfo.SetQuantizationOffset(qOffset); + outputTensorInfo1.SetQuantizationScale(qScale); + outputTensorInfo1.SetQuantizationOffset(qOffset); + outputTensorInfo2.SetQuantizationScale(qScale); + outputTensorInfo2.SetQuantizationOffset(qOffset); + outputTensorInfo3.SetQuantizationScale(qScale); + outputTensorInfo3.SetQuantizationOffset(qOffset); + outputTensorInfo4.SetQuantizationScale(qScale); + outputTensorInfo4.SetQuantizationOffset(qOffset); + } + + LayerTestResult ret1(outputTensorInfo1); + LayerTestResult ret2(outputTensorInfo2); + LayerTestResult ret3(outputTensorInfo3); + LayerTestResult ret4(outputTensorInfo4); + + auto input = MakeTensor(inputTensorInfo, std::vector( + QuantizedVector(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + }) + )); + + // Channel 0 of the original input. + ret1.outputExpected = MakeTensor(outputTensorInfo1, std::vector( + QuantizedVector(qScale, qOffset, { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + }) + )); + + // Channel 1 & 2 of the original input. + ret2.outputExpected = MakeTensor(outputTensorInfo2, std::vector( + QuantizedVector(qScale, qOffset, { + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + }) + )); + + // Channel 0 of return 2 (i.e. channels 1 and 2 of the original input). + ret3.outputExpected = MakeTensor(outputTensorInfo3, std::vector( + QuantizedVector(qScale, qOffset, { + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + }) + )); + + // Channel 1 of return 2. + ret4.outputExpected = MakeTensor(outputTensorInfo4, std::vector( + QuantizedVector(qScale, qOffset, { + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + }) + )); + + // NOTE: as a corollary of the splitting of x and y restriction the x and y values of the view origins + // have to be zero, the co-ordinates are as per the tensor info above channels, height/y, width/x + // note that under the hood the compute engine reverses these i.e. its coordinate system is x, y, channels. + std::vector wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of output[0]. + armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1); + + std::vector wOrigin2 = {1, 0, 0}; //Extent of the window is defined by size of output[1]. + armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2); + + std::vector wOrigin3 = {0, 0, 0}; //Extent of the window is defined by size of output[2]. + armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); + + std::vector wOrigin4 = {1, 0, 0}; //Extent of the window is defined by size of output[3]. + armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); + + bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo); + + std::unique_ptr outputHandle1 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo1.GetShape(), wOrigin1.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo1); + + std::unique_ptr outputHandle2 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo2.GetShape(), wOrigin2.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo2); + + std::unique_ptr outputHandle3 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo3.GetShape(), wOrigin3.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo3); + + std::unique_ptr outputHandle4 = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo4.GetShape(), wOrigin4.data()) : + workloadFactory.CreateTensorHandle(outputTensorInfo4); + + // Do the first split + armnn::SplitterQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get()); + AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get()); + + data.m_ViewOrigins.push_back(window1); + data.m_ViewOrigins.push_back(window2); + + std::unique_ptr workload = workloadFactory.CreateSplitter(data, info); + + inputHandle->Allocate(); + outputHandle1->Allocate(); + outputHandle2->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); + + workload->Execute(); + + CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get()); + CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get()); + +// // Do the second split. + armnn::SplitterQueueDescriptor data2; + armnn::WorkloadInfo info2; + AddInputToWorkload(data2, info2, outputTensorInfo2, outputHandle2.get()); + AddOutputToWorkload(data2, info2, outputTensorInfo3, outputHandle3.get()); + AddOutputToWorkload(data2, info2, outputTensorInfo4, outputHandle4.get()); + + data2.m_ViewOrigins.push_back(window3); + data2.m_ViewOrigins.push_back(window4); + + std::unique_ptr workload2 = workloadFactory.CreateSplitter(data2, info2); + + outputHandle3->Allocate(); + outputHandle4->Allocate(); + + workload2->Execute(); + + CopyDataFromITensorHandle(&ret3.output[0][0][0], outputHandle3.get()); + CopyDataFromITensorHandle(&ret4.output[0][0][0], outputHandle4.get()); + + std::vector> ret = {ret1, ret2, ret3, ret4,}; + + return ret; +} + + +template +LayerTestResult CopyViaSplitterTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset) +{ + const armnn::TensorInfo tensorInfo({ 3, 6, 5 }, armnn::GetDataType()); + auto input = MakeTensor(tensorInfo, QuantizedVector(qScale, qOffset, + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, + 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, + 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, + 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, + 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + + 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, + 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, + 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, + 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, + 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, + 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, + + 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, + 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, + 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, + 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, + 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, + 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, + })); + + std::vector origin = { 0, 0, 0 }; + armnn::SplitterQueueDescriptor::ViewOrigin window(origin); + + const bool subTensorsSupported = workloadFactory.SupportsSubTensors(); + + std::unique_ptr inputHandle = workloadFactory.CreateTensorHandle(tensorInfo); + + std::unique_ptr outputHandle = + subTensorsSupported ? + workloadFactory.CreateSubTensorHandle(*inputHandle, tensorInfo.GetShape(), origin.data()) : + workloadFactory.CreateTensorHandle(tensorInfo); + + armnn::SplitterQueueDescriptor data; + armnn::WorkloadInfo info; + AddInputToWorkload(data, info, tensorInfo, inputHandle.get()); + AddOutputToWorkload(data, info, tensorInfo, outputHandle.get()); + + data.m_ViewOrigins.push_back(window); + + std::unique_ptr workload = workloadFactory.CreateSplitter(data, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]); + + workload->Execute(); + + LayerTestResult ret(tensorInfo); + CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get()); + ret.outputExpected = input; + + return ret; +} diff --git a/src/backends/test/TensorCopyUtils.cpp b/src/backends/test/TensorCopyUtils.cpp new file mode 100644 index 0000000000..dc5864b285 --- /dev/null +++ b/src/backends/test/TensorCopyUtils.cpp @@ -0,0 +1,159 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include + +#include "TensorCopyUtils.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include "backends/ClTensorHandle.hpp" +#endif + +#if ARMCOMPUTENEON_ENABLED +#include "backends/NeonTensorHandle.hpp" +#endif + +#if ARMCOMPUTECLENABLED || ARMCOMPUTENEON_ENABLED +#include "backends/ArmComputeTensorUtils.hpp" +#endif + +#include "backends/CpuTensorHandle.hpp" + +void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) +{ + switch (tensorHandle->GetType()) + { + case armnn::ITensorHandle::Cpu: + { + auto handle = boost::polymorphic_downcast(tensorHandle); + memcpy(handle->GetTensor(), mem, handle->GetTensorInfo().GetNumBytes()); + break; + } +#ifdef ARMCOMPUTECL_ENABLED + case armnn::ITensorHandle::CL: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast(tensorHandle); + handle->Map(true); + switch(handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); + break; + case arm_compute::DataType::F16: + CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + handle->Unmap(); + break; + } +#endif +#if ARMCOMPUTENEON_ENABLED + case armnn::ITensorHandle::Neon: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast(tensorHandle); + switch (handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(static_cast(mem), handle->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + break; + } +#endif + default: + { + throw armnn::UnimplementedException(); + } + } +} + +void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle) +{ + switch (tensorHandle->GetType()) + { + case armnn::ITensorHandle::Cpu: + { + auto handle = boost::polymorphic_downcast(tensorHandle); + memcpy(mem, handle->GetTensor(), handle->GetTensorInfo().GetNumBytes()); + break; + } +#ifdef ARMCOMPUTECL_ENABLED + case armnn::ITensorHandle::CL: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast(tensorHandle); + const_cast(handle)->Map(true); + switch(handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); + break; + case arm_compute::DataType::F16: + CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast(handle)->Unmap(); + break; + } +#endif +#if ARMCOMPUTENEON_ENABLED + case armnn::ITensorHandle::Neon: + { + using armnn::armcomputetensorutils::CopyArmComputeITensorData; + auto handle = boost::polymorphic_downcast(tensorHandle); + switch (handle->GetDataType()) + { + case arm_compute::DataType::F32: + CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); + break; + case arm_compute::DataType::QASYMM8: + CopyArmComputeITensorData(handle->GetTensor(), static_cast(mem)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + break; + } +#endif + default: + { + throw armnn::UnimplementedException(); + } + } +} + +void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) +{ + tensorHandle->Allocate(); + CopyDataToITensorHandle(tensorHandle, mem); +} diff --git a/src/backends/test/TensorCopyUtils.hpp b/src/backends/test/TensorCopyUtils.hpp new file mode 100644 index 0000000000..0cec839903 --- /dev/null +++ b/src/backends/test/TensorCopyUtils.hpp @@ -0,0 +1,14 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "armnn/Tensor.hpp" +#include "backends/ITensorHandle.hpp" + +void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem); + +void CopyDataFromITensorHandle(void* mem, const armnn::ITensorHandle* tensorHandle); + +void AllocateAndCopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem); \ No newline at end of file diff --git a/src/backends/test/WorkloadDataValidation.cpp b/src/backends/test/WorkloadDataValidation.cpp new file mode 100644 index 0000000000..a5cfbd1270 --- /dev/null +++ b/src/backends/test/WorkloadDataValidation.cpp @@ -0,0 +1,471 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include +#include +#include +#include +#include + +#include + +#include "WorkloadTestUtils.hpp" + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(WorkloadInfoValidation) + + + +BOOST_AUTO_TEST_CASE(QueueDescriptor_Validate_WrongNumOfInputsOutputs) +{ + InputQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + //Invalid argument exception is expected, because no inputs and no outputs were defined. + BOOST_CHECK_THROW(RefWorkloadFactory().CreateInput(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(RefPooling2dFloat32Workload_Validate_WrongDimTensor) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {2, 3, 4}; // <- Invalid - input tensor has to be 4D. + unsigned int outputShape[] = {2, 3, 4, 5}; + + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + inputTensorInfo = armnn::TensorInfo(3, inputShape, armnn::DataType::Float32); + + Pooling2dQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + + // Invalid argument exception is expected, input tensor has to be 4D. + BOOST_CHECK_THROW(RefPooling2dFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(SoftmaxQueueDescriptor_Validate_WrongInputHeight) +{ + unsigned int inputHeight = 1; + unsigned int inputWidth = 1; + unsigned int inputChannels = 4; + unsigned int inputNum = 2; + + unsigned int outputChannels = inputChannels; + unsigned int outputHeight = inputHeight + 1; //Makes data invalid - Softmax expects height and width to be 1. + unsigned int outputWidth = inputWidth; + unsigned int outputNum = inputNum; + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + SoftmaxQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + //Invalid argument exception is expected, because height != 1. + BOOST_CHECK_THROW(RefSoftmaxFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(FullyConnectedQueueDescriptor_Validate_RequiredDataMissing) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputWidth = 1; + unsigned int outputHeight = 1; + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + // Define the tensor descriptors. + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + armnn::TensorInfo weightsDesc; + armnn::TensorInfo biasesDesc; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels, outputHeight, outputWidth }; + unsigned int weightsShape[] = { 1, 1, inputChannels, outputChannels }; + unsigned int biasShape[] = { 1, outputChannels, outputHeight, outputWidth }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + weightsDesc = armnn::TensorInfo(4, weightsShape, armnn::DataType::Float32); + biasesDesc = armnn::TensorInfo(4, biasShape, armnn::DataType::Float32); + + FullyConnectedQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + ScopedCpuTensorHandle weightTensor(weightsDesc); + ScopedCpuTensorHandle biasTensor(biasesDesc); + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + invalidData.m_Weight = &weightTensor; + invalidData.m_Bias = &biasTensor; + invalidData.m_Parameters.m_BiasEnabled = true; + invalidData.m_Parameters.m_TransposeWeightMatrix = false; + + + //Invalid argument exception is expected, because not all required fields have been provided. + //In particular inputsData[0], outputsData[0] and weightsData can not be null. + BOOST_CHECK_THROW(RefFullyConnectedFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(NormalizationQueueDescriptor_Validate_WrongInputHeight) +{ + constexpr unsigned int inputNum = 5; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + constexpr unsigned int inputChannels = 3; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = inputHeight + 1; //Makes data invalid - normalization requires. + //Input and output to have the same dimensions. + constexpr unsigned int outputWidth = inputWidth; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + + armnn::NormalizationAlgorithmMethod normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness; + armnn::NormalizationAlgorithmChannel normChannel = armnn::NormalizationAlgorithmChannel::Across; + float alpha = 1.f; + float beta = 1.f; + float kappa = 1.f; + uint32_t normSize = 5; + + NormalizationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + invalidData.m_Parameters.m_NormChannelType = normChannel; + invalidData.m_Parameters.m_NormMethodType = normMethod; + invalidData.m_Parameters.m_NormSize = normSize; + invalidData.m_Parameters.m_Alpha = alpha; + invalidData.m_Parameters.m_Beta = beta; + invalidData.m_Parameters.m_K = kappa; + + //Invalid argument exception is expected, because input height != output height. + BOOST_CHECK_THROW(RefNormalizationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow) +{ + constexpr unsigned int inputNum = 1; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + constexpr unsigned int inputChannels = 3; + + constexpr unsigned int outputNum = inputNum; + constexpr unsigned int outputChannels = inputChannels; + constexpr unsigned int outputHeight = 18; + constexpr unsigned int outputWidth = inputWidth; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + SplitterQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // Invalid, since it has only 3 dimensions while the input tensor is 4d. + std::vector wOrigin = {0, 0, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window(wOrigin); + invalidData.m_ViewOrigins.push_back(window); + + BOOST_TEST_INFO("Invalid argument exception is expected, because split window dimensionality does not " + "match input."); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + // Invalid, since window extends past the boundary of input tensor. + std::vector wOrigin3 = {0, 0, 15, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3); + invalidData.m_ViewOrigins[0] = window3; + BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ outputHeight > inputHeight"); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + + std::vector wOrigin4 = {0, 0, 0, 0}; + armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4); + invalidData.m_ViewOrigins[0] = window4; + + std::vector wOrigin5 = {1, 16, 20, 2}; + armnn::SplitterQueueDescriptor::ViewOrigin window5(wOrigin4); + invalidData.m_ViewOrigins.push_back(window5); + + BOOST_TEST_INFO("Invalid exception due to number of split windows not matching number of outputs."); + BOOST_CHECK_THROW(RefSplitterFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(MergerQueueDescriptor_Validate_WrongWindow) +{ + constexpr unsigned int inputNum = 1; + constexpr unsigned int inputChannels = 3; + constexpr unsigned int inputHeight = 32; + constexpr unsigned int inputWidth = 24; + + constexpr unsigned int outputNum = 1; + constexpr unsigned int outputChannels = 3; + constexpr unsigned int outputHeight = 32; + constexpr unsigned int outputWidth = 24; + + + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = {inputNum, inputChannels, inputHeight, inputWidth}; + unsigned int outputShape[] = {outputNum, outputChannels, outputHeight, outputWidth}; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + MergerQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // Invalid, since it has only 3 dimensions while the input tensor is 4d. + std::vector wOrigin = {0, 0, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window(wOrigin); + invalidData.m_ViewOrigins.push_back(window); + + BOOST_TEST_INFO("Invalid argument exception is expected, because merge window dimensionality does not " + "match input."); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + // Invalid, since window extends past the boundary of output tensor. + std::vector wOrigin3 = {0, 0, 15, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3); + invalidData.m_ViewOrigins[0] = window3; + BOOST_TEST_INFO("Invalid argument exception is expected (wOrigin3[2]+ inputHeight > outputHeight"); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + + std::vector wOrigin4 = {0, 0, 0, 0}; + armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4); + invalidData.m_ViewOrigins[0] = window4; + + std::vector wOrigin5 = {1, 16, 20, 2}; + armnn::MergerQueueDescriptor::ViewOrigin window5(wOrigin4); + invalidData.m_ViewOrigins.push_back(window5); + + BOOST_TEST_INFO("Invalid exception due to number of merge windows not matching number of inputs."); + BOOST_CHECK_THROW(RefMergerFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputNumbers) +{ + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo input2TensorInfo; + armnn::TensorInfo input3TensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape[] = {1, 1, 1, 1}; + + input1TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + input3TensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // Too few inputs. + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + + // Correct. + BOOST_CHECK_NO_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo)); + + AddInputToWorkload(invalidData, invalidInfo, input3TensorInfo, nullptr); + + // Too many inputs. + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_CASE(AdditionQueueDescriptor_Validate_InputShapes) +{ + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo input2TensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int shape1[] = {1, 1, 2, 1}; + unsigned int shape2[] = {1, 1, 3, 2}; + + // Incompatible shapes even with broadcasting. + { + input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } + + // Output size not compatible with input sizes. + { + input1TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + input2TensorInfo = armnn::TensorInfo(4, shape1, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, shape2, armnn::DataType::Float32); + + AdditionQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input2TensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // Output differs. + BOOST_CHECK_THROW(RefAdditionFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } +} + +BOOST_AUTO_TEST_CASE(MultiplicationQueueDescriptor_Validate_InputTensorDimensionMismatch) +{ + armnn::TensorInfo input0TensorInfo; + armnn::TensorInfo input1TensorInfo; + armnn::TensorInfo outputTensorInfo; + + constexpr unsigned int input0Shape[] = { 2, 2, 4, 4 }; + constexpr std::size_t dimensionCount = std::extent::value; + + // Checks dimension consistency for input tensors. + for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) + { + unsigned int input1Shape[dimensionCount]; + for (unsigned int i = 0; i < dimensionCount; ++i) + { + input1Shape[i] = input0Shape[i]; + } + + ++input1Shape[dimIndex]; + + input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + input1TensorInfo = armnn::TensorInfo(dimensionCount, input1Shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + + MultiplicationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + + BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } + + // Checks dimension consistency for input and output tensors. + for (unsigned int dimIndex = 0; dimIndex < dimensionCount; ++dimIndex) + { + unsigned int outputShape[dimensionCount]; + for (unsigned int i = 0; i < dimensionCount; ++i) + { + outputShape[i] = input0Shape[i]; + } + + ++outputShape[dimIndex]; + + input0TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + input1TensorInfo = armnn::TensorInfo(dimensionCount, input0Shape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(dimensionCount, outputShape, armnn::DataType::Float32); + + MultiplicationQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input0TensorInfo, nullptr); + AddInputToWorkload(invalidData, invalidInfo, input1TensorInfo, nullptr); + + BOOST_CHECK_THROW(RefMultiplicationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); + } +} + +BOOST_AUTO_TEST_CASE(ReshapeQueueDescriptor_Validate_MismatchingNumElements) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + // The input and output shapes should have the same number of elements, but these don't. + unsigned int inputShape[] = { 1, 1, 2, 3 }; + unsigned int outputShape[] = { 1, 1, 1, 2 }; + + inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32); + + ReshapeQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + // InvalidArgumentException is expected, because the number of elements don't match. + BOOST_CHECK_THROW(RefReshapeFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException); +} + + +BOOST_AUTO_TEST_CASE(LstmQueueDescriptor_Validate) +{ + armnn::TensorInfo inputTensorInfo; + armnn::TensorInfo outputTensorInfo; + + unsigned int inputShape[] = { 1, 2 }; + unsigned int outputShape[] = { 1 }; + + inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::DataType::Float32); + outputTensorInfo = armnn::TensorInfo(1, outputShape, armnn::DataType::Float32); + + LstmQueueDescriptor invalidData; + WorkloadInfo invalidInfo; + + AddInputToWorkload(invalidData, invalidInfo, inputTensorInfo, nullptr); + AddOutputToWorkload(invalidData, invalidInfo, outputTensorInfo, nullptr); + + BOOST_CHECK_THROW(invalidData.Validate(invalidInfo), armnn::InvalidArgumentException); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/test/WorkloadTestUtils.hpp b/src/backends/test/WorkloadTestUtils.hpp new file mode 100644 index 0000000000..a7b75309f7 --- /dev/null +++ b/src/backends/test/WorkloadTestUtils.hpp @@ -0,0 +1,55 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +namespace armnn +{ +class ITensorHandle; +} + +template +void AddInputToWorkload(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Inputs.push_back(tensorHandle); + info.m_InputTensorInfos.push_back(tensorInfo); +} + +template +void AddOutputToWorkload(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Outputs.push_back(tensorHandle); + info.m_OutputTensorInfos.push_back(tensorInfo); +} + +template +void SetWorkloadInput(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + unsigned int index, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Inputs[index] = tensorHandle; + info.m_InputTensorInfos[index] = tensorInfo; +} + +template +void SetWorkloadOutput(QueueDescriptor& descriptor, + armnn::WorkloadInfo& info, + unsigned int index, + const armnn::TensorInfo& tensorInfo, + armnn::ITensorHandle* tensorHandle) +{ + descriptor.m_Outputs[index] = tensorHandle; + info.m_OutputTensorInfos[index] = tensorInfo; +} \ No newline at end of file -- cgit v1.2.1