From ac42efd972b7d03da17f057b2ceaaac5d6e96b1a Mon Sep 17 00:00:00 2001 From: David Beck Date: Wed, 26 Sep 2018 17:41:13 +0100 Subject: IVGCVSW-1900 : CL backend folder structure * moving backends/ClWorkloads to backends/cl * and moving pure Cl workload related code to backends/cl/workloads Change-Id: I019a3c6b4da5e7a23074bf03fb057e63199ad129 --- Android.mk | 3 - src/armnn/Layer.cpp | 4 +- src/armnn/Layer.hpp | 6 +- src/armnn/LayerSupport.cpp | 2 +- src/armnn/LoadedNetwork.hpp | 2 +- src/armnn/Network.cpp | 10 +- src/armnn/Runtime.hpp | 8 +- src/armnn/layers/MeanLayer.cpp | 8 +- src/armnn/layers/PadLayer.cpp | 6 +- src/armnn/memory/BaseMemoryManager.hpp | 12 +- src/armnn/optimizations/ConvertConstants.hpp | 6 +- src/armnn/test/CreateWorkload.hpp | 7 +- src/armnn/test/CreateWorkloadClNeon.hpp | 2 +- src/armnn/test/EndToEndTest.cpp | 8 +- src/armnn/test/FP16SupportTest.cpp | 16 +- src/armnn/test/GraphTests.cpp | 17 +- src/armnn/test/JsonPrinterTests.cpp | 12 +- src/armnn/test/NetworkTests.cpp | 2 +- src/armnn/test/OpenClTimerTest.cpp | 16 +- src/armnn/test/OptimizerTests.cpp | 10 +- src/armnn/test/TensorHelpers.hpp | 4 +- src/backends/CMakeLists.txt | 14 - src/backends/ClContextControl.cpp | 235 ---------- src/backends/ClContextControl.hpp | 62 --- src/backends/ClLayerSupport.cpp | 478 ------------------- src/backends/ClLayerSupport.hpp | 164 ------- src/backends/ClTensorHandle.hpp | 141 ------ src/backends/ClWorkloadFactory.cpp | 506 --------------------- src/backends/ClWorkloadFactory.hpp | 139 ------ src/backends/ClWorkloads/CMakeLists.txt | 91 ---- .../ClWorkloads/ClActivationFloatWorkload.cpp | 56 --- .../ClWorkloads/ClActivationFloatWorkload.hpp | 29 -- .../ClWorkloads/ClActivationUint8Workload.cpp | 44 -- .../ClWorkloads/ClActivationUint8Workload.hpp | 29 -- src/backends/ClWorkloads/ClAdditionWorkload.cpp | 66 --- src/backends/ClWorkloads/ClAdditionWorkload.hpp | 31 -- .../ClWorkloads/ClBaseConstantWorkload.cpp | 64 --- .../ClWorkloads/ClBaseConstantWorkload.hpp | 30 -- src/backends/ClWorkloads/ClBaseMergerWorkload.hpp | 28 -- .../ClWorkloads/ClBaseSplitterWorkload.hpp | 28 -- .../ClBatchNormalizationFloatWorkload.cpp | 96 ---- .../ClBatchNormalizationFloatWorkload.hpp | 46 -- .../ClWorkloads/ClConstantFloatWorkload.cpp | 18 - .../ClWorkloads/ClConstantFloatWorkload.hpp | 20 - .../ClWorkloads/ClConstantUint8Workload.cpp | 18 - .../ClWorkloads/ClConstantUint8Workload.hpp | 20 - .../ClWorkloads/ClConvertFp16ToFp32Workload.cpp | 66 --- .../ClWorkloads/ClConvertFp16ToFp32Workload.hpp | 30 -- .../ClWorkloads/ClConvertFp32ToFp16Workload.cpp | 66 --- .../ClWorkloads/ClConvertFp32ToFp16Workload.hpp | 30 -- .../ClWorkloads/ClConvolution2dBaseWorkload.cpp | 48 -- .../ClWorkloads/ClConvolution2dBaseWorkload.hpp | 24 - .../ClWorkloads/ClConvolution2dFloatWorkload.cpp | 81 ---- .../ClWorkloads/ClConvolution2dFloatWorkload.hpp | 35 -- .../ClWorkloads/ClConvolution2dUint8Workload.cpp | 81 ---- .../ClWorkloads/ClConvolution2dUint8Workload.hpp | 35 -- .../ClDepthwiseConvolutionBaseWorkload.cpp | 125 ----- .../ClDepthwiseConvolutionBaseWorkload.hpp | 40 -- .../ClDepthwiseConvolutionFloatWorkload.cpp | 39 -- .../ClDepthwiseConvolutionFloatWorkload.hpp | 26 -- .../ClDepthwiseConvolutionUint8Workload.cpp | 40 -- .../ClDepthwiseConvolutionUint8Workload.hpp | 23 - .../ClWorkloads/ClDivisionFloatWorkload.cpp | 48 -- .../ClWorkloads/ClDivisionFloatWorkload.hpp | 32 -- src/backends/ClWorkloads/ClFloorFloatWorkload.cpp | 31 -- src/backends/ClWorkloads/ClFloorFloatWorkload.hpp | 30 -- .../ClWorkloads/ClFullyConnectedWorkload.cpp | 96 ---- .../ClWorkloads/ClFullyConnectedWorkload.hpp | 43 -- .../ClWorkloads/ClL2NormalizationFloatWorkload.cpp | 50 -- .../ClWorkloads/ClL2NormalizationFloatWorkload.hpp | 35 -- src/backends/ClWorkloads/ClLstmFloatWorkload.cpp | 391 ---------------- src/backends/ClWorkloads/ClLstmFloatWorkload.hpp | 68 --- src/backends/ClWorkloads/ClMergerFloatWorkload.cpp | 20 - src/backends/ClWorkloads/ClMergerFloatWorkload.hpp | 22 - src/backends/ClWorkloads/ClMergerUint8Workload.cpp | 19 - src/backends/ClWorkloads/ClMergerUint8Workload.hpp | 21 - .../ClWorkloads/ClMultiplicationFloatWorkload.cpp | 60 --- .../ClWorkloads/ClMultiplicationFloatWorkload.hpp | 34 -- .../ClWorkloads/ClNormalizationFloatWorkload.cpp | 51 --- .../ClWorkloads/ClNormalizationFloatWorkload.hpp | 29 -- src/backends/ClWorkloads/ClPadWorkload.cpp | 62 --- src/backends/ClWorkloads/ClPadWorkload.hpp | 32 -- src/backends/ClWorkloads/ClPermuteWorkload.cpp | 56 --- src/backends/ClWorkloads/ClPermuteWorkload.hpp | 42 -- .../ClWorkloads/ClPooling2dBaseWorkload.cpp | 47 -- .../ClWorkloads/ClPooling2dBaseWorkload.hpp | 33 -- .../ClWorkloads/ClPooling2dFloatWorkload.cpp | 26 -- .../ClWorkloads/ClPooling2dFloatWorkload.hpp | 22 - .../ClWorkloads/ClPooling2dUint8Workload.cpp | 27 -- .../ClWorkloads/ClPooling2dUint8Workload.hpp | 25 - .../ClWorkloads/ClReshapeFloatWorkload.cpp | 33 -- .../ClWorkloads/ClReshapeFloatWorkload.hpp | 28 -- .../ClWorkloads/ClReshapeUint8Workload.cpp | 31 -- .../ClWorkloads/ClReshapeUint8Workload.hpp | 29 -- .../ClWorkloads/ClResizeBilinearFloatWorkload.cpp | 38 -- .../ClWorkloads/ClResizeBilinearFloatWorkload.hpp | 25 - src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp | 30 -- src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp | 17 - .../ClWorkloads/ClSoftmaxFloatWorkload.cpp | 33 -- .../ClWorkloads/ClSoftmaxFloatWorkload.hpp | 30 -- .../ClWorkloads/ClSoftmaxUint8Workload.cpp | 43 -- .../ClWorkloads/ClSoftmaxUint8Workload.hpp | 31 -- .../ClWorkloads/ClSplitterFloatWorkload.cpp | 19 - .../ClWorkloads/ClSplitterFloatWorkload.hpp | 20 - .../ClWorkloads/ClSplitterUint8Workload.cpp | 19 - .../ClWorkloads/ClSplitterUint8Workload.hpp | 21 - src/backends/ClWorkloads/ClSubtractionWorkload.cpp | 66 --- src/backends/ClWorkloads/ClSubtractionWorkload.hpp | 31 -- src/backends/ClWorkloads/ClWorkloadUtils.hpp | 63 --- src/backends/ClWorkloads/backend.cmake | 11 - src/backends/ClWorkloads/backend.mk | 49 -- src/backends/MemCopyWorkload.cpp | 4 +- src/backends/MemCopyWorkload.hpp | 2 +- src/backends/OutputHandler.cpp | 6 +- src/backends/OutputHandler.hpp | 10 +- src/backends/WorkloadFactory.cpp | 2 +- src/backends/cl/CMakeLists.txt | 22 + src/backends/cl/ClContextControl.cpp | 235 ++++++++++ src/backends/cl/ClContextControl.hpp | 62 +++ src/backends/cl/ClLayerSupport.cpp | 478 +++++++++++++++++++ src/backends/cl/ClLayerSupport.hpp | 164 +++++++ src/backends/cl/ClTensorHandle.hpp | 141 ++++++ src/backends/cl/ClWorkloadFactory.cpp | 506 +++++++++++++++++++++ src/backends/cl/ClWorkloadFactory.hpp | 139 ++++++ src/backends/cl/backend.cmake | 13 + src/backends/cl/backend.mk | 51 +++ src/backends/cl/test/CMakeLists.txt | 0 src/backends/cl/workloads/CMakeLists.txt | 92 ++++ .../cl/workloads/ClActivationFloatWorkload.cpp | 56 +++ .../cl/workloads/ClActivationFloatWorkload.hpp | 29 ++ .../cl/workloads/ClActivationUint8Workload.cpp | 44 ++ .../cl/workloads/ClActivationUint8Workload.hpp | 29 ++ src/backends/cl/workloads/ClAdditionWorkload.cpp | 66 +++ src/backends/cl/workloads/ClAdditionWorkload.hpp | 31 ++ .../cl/workloads/ClBaseConstantWorkload.cpp | 64 +++ .../cl/workloads/ClBaseConstantWorkload.hpp | 30 ++ src/backends/cl/workloads/ClBaseMergerWorkload.hpp | 28 ++ .../cl/workloads/ClBaseSplitterWorkload.hpp | 28 ++ .../ClBatchNormalizationFloatWorkload.cpp | 96 ++++ .../ClBatchNormalizationFloatWorkload.hpp | 46 ++ .../cl/workloads/ClConstantFloatWorkload.cpp | 18 + .../cl/workloads/ClConstantFloatWorkload.hpp | 20 + .../cl/workloads/ClConstantUint8Workload.cpp | 18 + .../cl/workloads/ClConstantUint8Workload.hpp | 20 + .../cl/workloads/ClConvertFp16ToFp32Workload.cpp | 66 +++ .../cl/workloads/ClConvertFp16ToFp32Workload.hpp | 30 ++ .../cl/workloads/ClConvertFp32ToFp16Workload.cpp | 66 +++ .../cl/workloads/ClConvertFp32ToFp16Workload.hpp | 30 ++ .../cl/workloads/ClConvolution2dBaseWorkload.cpp | 48 ++ .../cl/workloads/ClConvolution2dBaseWorkload.hpp | 24 + .../cl/workloads/ClConvolution2dFloatWorkload.cpp | 81 ++++ .../cl/workloads/ClConvolution2dFloatWorkload.hpp | 35 ++ .../cl/workloads/ClConvolution2dUint8Workload.cpp | 81 ++++ .../cl/workloads/ClConvolution2dUint8Workload.hpp | 35 ++ .../ClDepthwiseConvolutionBaseWorkload.cpp | 125 +++++ .../ClDepthwiseConvolutionBaseWorkload.hpp | 40 ++ .../ClDepthwiseConvolutionFloatWorkload.cpp | 39 ++ .../ClDepthwiseConvolutionFloatWorkload.hpp | 26 ++ .../ClDepthwiseConvolutionUint8Workload.cpp | 40 ++ .../ClDepthwiseConvolutionUint8Workload.hpp | 23 + .../cl/workloads/ClDivisionFloatWorkload.cpp | 48 ++ .../cl/workloads/ClDivisionFloatWorkload.hpp | 32 ++ src/backends/cl/workloads/ClFloorFloatWorkload.cpp | 31 ++ src/backends/cl/workloads/ClFloorFloatWorkload.hpp | 30 ++ .../cl/workloads/ClFullyConnectedWorkload.cpp | 96 ++++ .../cl/workloads/ClFullyConnectedWorkload.hpp | 43 ++ .../workloads/ClL2NormalizationFloatWorkload.cpp | 50 ++ .../workloads/ClL2NormalizationFloatWorkload.hpp | 35 ++ src/backends/cl/workloads/ClLstmFloatWorkload.cpp | 391 ++++++++++++++++ src/backends/cl/workloads/ClLstmFloatWorkload.hpp | 68 +++ .../cl/workloads/ClMergerFloatWorkload.cpp | 20 + .../cl/workloads/ClMergerFloatWorkload.hpp | 22 + .../cl/workloads/ClMergerUint8Workload.cpp | 19 + .../cl/workloads/ClMergerUint8Workload.hpp | 21 + .../cl/workloads/ClMultiplicationFloatWorkload.cpp | 60 +++ .../cl/workloads/ClMultiplicationFloatWorkload.hpp | 34 ++ .../cl/workloads/ClNormalizationFloatWorkload.cpp | 51 +++ .../cl/workloads/ClNormalizationFloatWorkload.hpp | 29 ++ src/backends/cl/workloads/ClPadWorkload.cpp | 63 +++ src/backends/cl/workloads/ClPadWorkload.hpp | 32 ++ src/backends/cl/workloads/ClPermuteWorkload.cpp | 56 +++ src/backends/cl/workloads/ClPermuteWorkload.hpp | 42 ++ .../cl/workloads/ClPooling2dBaseWorkload.cpp | 47 ++ .../cl/workloads/ClPooling2dBaseWorkload.hpp | 33 ++ .../cl/workloads/ClPooling2dFloatWorkload.cpp | 26 ++ .../cl/workloads/ClPooling2dFloatWorkload.hpp | 22 + .../cl/workloads/ClPooling2dUint8Workload.cpp | 27 ++ .../cl/workloads/ClPooling2dUint8Workload.hpp | 25 + .../cl/workloads/ClReshapeFloatWorkload.cpp | 33 ++ .../cl/workloads/ClReshapeFloatWorkload.hpp | 28 ++ .../cl/workloads/ClReshapeUint8Workload.cpp | 31 ++ .../cl/workloads/ClReshapeUint8Workload.hpp | 29 ++ .../cl/workloads/ClResizeBilinearFloatWorkload.cpp | 38 ++ .../cl/workloads/ClResizeBilinearFloatWorkload.hpp | 25 + .../cl/workloads/ClSoftmaxBaseWorkload.cpp | 30 ++ .../cl/workloads/ClSoftmaxBaseWorkload.hpp | 17 + .../cl/workloads/ClSoftmaxFloatWorkload.cpp | 33 ++ .../cl/workloads/ClSoftmaxFloatWorkload.hpp | 30 ++ .../cl/workloads/ClSoftmaxUint8Workload.cpp | 43 ++ .../cl/workloads/ClSoftmaxUint8Workload.hpp | 31 ++ .../cl/workloads/ClSplitterFloatWorkload.cpp | 19 + .../cl/workloads/ClSplitterFloatWorkload.hpp | 20 + .../cl/workloads/ClSplitterUint8Workload.cpp | 19 + .../cl/workloads/ClSplitterUint8Workload.hpp | 21 + .../cl/workloads/ClSubtractionWorkload.cpp | 66 +++ .../cl/workloads/ClSubtractionWorkload.hpp | 31 ++ src/backends/cl/workloads/ClWorkloadUtils.hpp | 63 +++ src/backends/cl/workloads/ClWorkloads.hpp | 41 ++ .../workloads/NeonConvertFp16ToFp32Workload.cpp | 6 +- .../workloads/NeonConvertFp32ToFp16Workload.cpp | 8 +- src/backends/reference/workloads/Merger.hpp | 3 +- .../workloads/RefActivationFloat32Workload.hpp | 2 +- .../workloads/RefActivationUint8Workload.hpp | 4 +- .../reference/workloads/RefArithmeticWorkload.hpp | 6 +- .../workloads/RefBaseConstantWorkload.hpp | 4 +- .../RefBatchNormalizationFloat32Workload.hpp | 4 +- .../RefBatchNormalizationUint8Workload.hpp | 4 +- .../workloads/RefConvertFp16ToFp32Workload.hpp | 4 +- .../workloads/RefConvertFp32ToFp16Workload.hpp | 4 +- .../workloads/RefConvolution2dFloat32Workload.hpp | 4 +- .../workloads/RefConvolution2dUint8Workload.hpp | 4 +- .../RefDepthwiseConvolution2dFloat32Workload.hpp | 4 +- .../RefDepthwiseConvolution2dUint8Workload.hpp | 4 +- .../RefFakeQuantizationFloat32Workload.hpp | 4 +- .../workloads/RefFloorFloat32Workload.hpp | 4 +- .../workloads/RefFullyConnectedFloat32Workload.hpp | 4 +- .../workloads/RefFullyConnectedUint8Workload.hpp | 4 +- .../RefL2NormalizationFloat32Workload.hpp | 4 +- .../reference/workloads/RefLstmFloat32Workload.hpp | 4 +- .../workloads/RefMergerFloat32Workload.hpp | 4 +- .../reference/workloads/RefMergerUint8Workload.hpp | 4 +- .../workloads/RefNormalizationFloat32Workload.hpp | 4 +- .../reference/workloads/RefPermuteWorkload.hpp | 2 +- .../workloads/RefPooling2dFloat32Workload.hpp | 4 +- .../workloads/RefPooling2dUint8Workload.hpp | 4 +- .../workloads/RefReshapeFloat32Workload.hpp | 4 +- .../workloads/RefReshapeUint8Workload.hpp | 4 +- .../workloads/RefResizeBilinearFloat32Workload.hpp | 4 +- .../workloads/RefResizeBilinearUint8Workload.hpp | 4 +- .../workloads/RefSoftmaxFloat32Workload.hpp | 4 +- .../workloads/RefSoftmaxUint8Workload.hpp | 4 +- .../workloads/RefSplitterFloat32Workload.hpp | 4 +- .../workloads/RefSplitterUint8Workload.hpp | 4 +- .../reference/workloads/RefWorkloadUtils.hpp | 2 +- src/backends/reference/workloads/Splitter.hpp | 3 +- src/backends/test/ActivationTestImpl.hpp | 10 +- src/backends/test/ArmComputeCl.cpp | 6 +- src/backends/test/BatchNormTestImpl.hpp | 8 +- src/backends/test/ClContextControlFixture.hpp | 2 +- src/backends/test/Conv2dTestImpl.hpp | 6 +- src/backends/test/CreateWorkloadCl.cpp | 10 +- src/backends/test/IsLayerSupportedTest.cpp | 10 +- src/backends/test/LayerReleaseConstantDataTest.cpp | 8 +- src/backends/test/LayerTests.cpp | 2 +- src/backends/test/LstmTestImpl.hpp | 8 +- src/backends/test/MemCopyTests.cpp | 4 +- src/backends/test/NormTestImpl.hpp | 8 +- src/backends/test/PermuteTestImpl.hpp | 6 +- src/backends/test/Pooling2dTestImpl.hpp | 6 +- src/backends/test/ReshapeTestImpl.hpp | 6 +- src/backends/test/SoftmaxTestImpl.hpp | 6 +- src/backends/test/SplitterTestImpl.hpp | 8 +- src/backends/test/TensorCopyUtils.cpp | 4 +- src/backends/test/TensorCopyUtils.hpp | 4 +- 264 files changed, 5642 insertions(+), 5590 deletions(-) delete mode 100644 src/backends/ClContextControl.cpp delete mode 100644 src/backends/ClContextControl.hpp delete mode 100644 src/backends/ClLayerSupport.cpp delete mode 100644 src/backends/ClLayerSupport.hpp delete mode 100644 src/backends/ClTensorHandle.hpp delete mode 100644 src/backends/ClWorkloadFactory.cpp delete mode 100644 src/backends/ClWorkloadFactory.hpp delete mode 100644 src/backends/ClWorkloads/CMakeLists.txt delete mode 100644 src/backends/ClWorkloads/ClActivationFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClActivationFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClActivationUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClActivationUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClAdditionWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClAdditionWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClBaseConstantWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClBaseConstantWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClBaseMergerWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClConstantFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClConstantFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClConstantUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClConstantUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClFloorFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClFloorFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClLstmFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClLstmFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClMergerFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClMergerFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClMergerUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClMergerUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClPadWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClPadWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClPermuteWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClPermuteWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClReshapeUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClReshapeUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClSplitterUint8Workload.cpp delete mode 100644 src/backends/ClWorkloads/ClSplitterUint8Workload.hpp delete mode 100644 src/backends/ClWorkloads/ClSubtractionWorkload.cpp delete mode 100644 src/backends/ClWorkloads/ClSubtractionWorkload.hpp delete mode 100644 src/backends/ClWorkloads/ClWorkloadUtils.hpp delete mode 100644 src/backends/ClWorkloads/backend.cmake delete mode 100644 src/backends/ClWorkloads/backend.mk create mode 100644 src/backends/cl/CMakeLists.txt create mode 100644 src/backends/cl/ClContextControl.cpp create mode 100644 src/backends/cl/ClContextControl.hpp create mode 100644 src/backends/cl/ClLayerSupport.cpp create mode 100644 src/backends/cl/ClLayerSupport.hpp create mode 100644 src/backends/cl/ClTensorHandle.hpp create mode 100644 src/backends/cl/ClWorkloadFactory.cpp create mode 100644 src/backends/cl/ClWorkloadFactory.hpp create mode 100644 src/backends/cl/backend.cmake create mode 100644 src/backends/cl/backend.mk create mode 100644 src/backends/cl/test/CMakeLists.txt create mode 100644 src/backends/cl/workloads/CMakeLists.txt create mode 100644 src/backends/cl/workloads/ClActivationFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClActivationFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClActivationUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClActivationUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClAdditionWorkload.cpp create mode 100644 src/backends/cl/workloads/ClAdditionWorkload.hpp create mode 100644 src/backends/cl/workloads/ClBaseConstantWorkload.cpp create mode 100644 src/backends/cl/workloads/ClBaseConstantWorkload.hpp create mode 100644 src/backends/cl/workloads/ClBaseMergerWorkload.hpp create mode 100644 src/backends/cl/workloads/ClBaseSplitterWorkload.hpp create mode 100644 src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClConstantFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClConstantFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClConstantUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClConstantUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp create mode 100644 src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp create mode 100644 src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp create mode 100644 src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp create mode 100644 src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClDivisionFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClDivisionFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClFloorFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClFloorFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClFullyConnectedWorkload.cpp create mode 100644 src/backends/cl/workloads/ClFullyConnectedWorkload.hpp create mode 100644 src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClLstmFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClLstmFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClMergerFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClMergerFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClMergerUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClMergerUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClPadWorkload.cpp create mode 100644 src/backends/cl/workloads/ClPadWorkload.hpp create mode 100644 src/backends/cl/workloads/ClPermuteWorkload.cpp create mode 100644 src/backends/cl/workloads/ClPermuteWorkload.hpp create mode 100644 src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp create mode 100644 src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp create mode 100644 src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClPooling2dUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClPooling2dUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClReshapeFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClReshapeFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClReshapeUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClReshapeUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp create mode 100644 src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp create mode 100644 src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClSplitterFloatWorkload.cpp create mode 100644 src/backends/cl/workloads/ClSplitterFloatWorkload.hpp create mode 100644 src/backends/cl/workloads/ClSplitterUint8Workload.cpp create mode 100644 src/backends/cl/workloads/ClSplitterUint8Workload.hpp create mode 100644 src/backends/cl/workloads/ClSubtractionWorkload.cpp create mode 100644 src/backends/cl/workloads/ClSubtractionWorkload.hpp create mode 100644 src/backends/cl/workloads/ClWorkloadUtils.hpp create mode 100644 src/backends/cl/workloads/ClWorkloads.hpp diff --git a/Android.mk b/Android.mk index 56bc1340e3..25f3349cb9 100644 --- a/Android.mk +++ b/Android.mk @@ -64,15 +64,12 @@ LOCAL_SRC_FILES := \ src/armnnUtils/Logging.cpp \ src/armnnUtils/Permute.cpp \ src/backends/aclCommon/ArmComputeTensorUtils.cpp \ - src/backends/ClWorkloadFactory.cpp \ - src/backends/ClContextControl.cpp \ src/backends/CpuTensorHandle.cpp \ src/backends/MemCopyWorkload.cpp \ src/backends/WorkloadData.cpp \ src/backends/WorkloadFactory.cpp \ src/backends/OutputHandler.cpp \ src/backends/StringMapping.cpp \ - src/backends/ClLayerSupport.cpp \ src/armnn/layers/ActivationLayer.cpp \ src/armnn/layers/AdditionLayer.cpp \ src/armnn/layers/ArithmeticBaseLayer.cpp \ diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index b0a3ac543b..d9229203ea 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -5,12 +5,12 @@ #include "Layer.hpp" #include "Graph.hpp" -#include "backends/WorkloadData.hpp" +#include +#include #include #include #include -#include "backends/CpuTensorHandle.hpp" #include diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index fd523cee9e..d897b255a6 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -6,9 +6,9 @@ #include "LayerFwd.hpp" -#include "backends/OutputHandler.hpp" -#include "backends/WorkloadDataCollector.hpp" -#include "backends/WorkloadInfo.hpp" +#include +#include +#include #include "InternalTypes.hpp" #include "SerializeLayerParameters.hpp" diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp index 74243df7ba..538f8e8cda 100644 --- a/src/armnn/LayerSupport.cpp +++ b/src/armnn/LayerSupport.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index 51551b395b..7808cc19e3 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 49e60e1ad4..76c33ba2e6 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -6,11 +6,13 @@ #include "Graph.hpp" #include "Layer.hpp" #include "DeviceSpec.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" #include "Optimizer.hpp" -#include "armnn/Exceptions.hpp" +#include "optimizations/All.hpp" + +#include +#include +#include #include #include @@ -27,8 +29,6 @@ #include #include -#include "optimizations/All.hpp" - namespace armnn { diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index b0a393d3f8..12211f4e04 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -6,10 +6,10 @@ #include "LoadedNetwork.hpp" #include "DeviceSpec.hpp" -#include "armnn/INetwork.hpp" -#include "armnn/IRuntime.hpp" -#include "armnn/Tensor.hpp" -#include "backends/ClContextControl.hpp" +#include +#include +#include +#include #include #include diff --git a/src/armnn/layers/MeanLayer.cpp b/src/armnn/layers/MeanLayer.cpp index 01f1133c5c..3259d95afd 100644 --- a/src/armnn/layers/MeanLayer.cpp +++ b/src/armnn/layers/MeanLayer.cpp @@ -6,9 +6,9 @@ #include "MeanLayer.hpp" #include "LayerCloneBase.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadData.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include +#include #include @@ -44,7 +44,7 @@ void MeanLayer::ValidateTensorShapesFromInputs() VerifyLayerConnections(1, CHECK_LOCATION()); const TensorInfo& input = GetInputSlot(0).GetConnection()->GetTensorInfo(); - + BOOST_ASSERT_MSG(input.GetNumDimensions() > 0 && input.GetNumDimensions() <= MaxNumOfTensorDimensions, "MeanLayer: Mean supports up to 4D input."); diff --git a/src/armnn/layers/PadLayer.cpp b/src/armnn/layers/PadLayer.cpp index 6a1e9150b8..7511e80dae 100644 --- a/src/armnn/layers/PadLayer.cpp +++ b/src/armnn/layers/PadLayer.cpp @@ -6,9 +6,9 @@ #include "PadLayer.hpp" #include "LayerCloneBase.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadData.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include +#include #include diff --git a/src/armnn/memory/BaseMemoryManager.hpp b/src/armnn/memory/BaseMemoryManager.hpp index 6710a785c7..c82eca6c1d 100644 --- a/src/armnn/memory/BaseMemoryManager.hpp +++ b/src/armnn/memory/BaseMemoryManager.hpp @@ -4,20 +4,20 @@ // #pragma once -#include "backends/WorkloadFactory.hpp" +#include #ifdef ARMCOMPUTENEON_ENABLED -#include "arm_compute/runtime/MemoryGroup.h" +#include #endif #ifdef ARMCOMPUTECL_ENABLED -#include "arm_compute/runtime/CL/CLMemoryGroup.h" +#include #endif #if defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED) -#include "arm_compute/runtime/IAllocator.h" -#include "arm_compute/runtime/IMemoryGroup.h" -#include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include +#include +#include #endif namespace armnn diff --git a/src/armnn/optimizations/ConvertConstants.hpp b/src/armnn/optimizations/ConvertConstants.hpp index 7168effe0c..9306a53bf4 100644 --- a/src/armnn/optimizations/ConvertConstants.hpp +++ b/src/armnn/optimizations/ConvertConstants.hpp @@ -6,9 +6,9 @@ #pragma once #include "Optimization.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "Half.hpp" -#include "FloatingPointConverter.hpp" +#include +#include +#include namespace armnn { diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index 61f9f1cd83..c111fe6016 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -8,12 +8,13 @@ #include -#include "backends/WorkloadData.hpp" -#include "Graph.hpp" +#include +#include + +#include #include -#include "backends/CpuTensorHandle.hpp" using namespace armnn; diff --git a/src/armnn/test/CreateWorkloadClNeon.hpp b/src/armnn/test/CreateWorkloadClNeon.hpp index 6d02c95740..a18e85d5f0 100644 --- a/src/armnn/test/CreateWorkloadClNeon.hpp +++ b/src/armnn/test/CreateWorkloadClNeon.hpp @@ -8,7 +8,7 @@ #include #if ARMCOMPUTECL_ENABLED -#include +#include #endif #if ARMCOMPUTENEON_ENABLED diff --git a/src/armnn/test/EndToEndTest.cpp b/src/armnn/test/EndToEndTest.cpp index 1d0886ed09..98b18411d4 100644 --- a/src/armnn/test/EndToEndTest.cpp +++ b/src/armnn/test/EndToEndTest.cpp @@ -4,11 +4,11 @@ // #include -#include "armnn/Descriptors.hpp" -#include "armnn/IRuntime.hpp" -#include "armnn/INetwork.hpp" +#include +#include +#include -#include "backends/test/QuantizeHelper.hpp" +#include #include #include diff --git a/src/armnn/test/FP16SupportTest.cpp b/src/armnn/test/FP16SupportTest.cpp index 74ca372b0a..6baadc4c77 100644 --- a/src/armnn/test/FP16SupportTest.cpp +++ b/src/armnn/test/FP16SupportTest.cpp @@ -3,14 +3,14 @@ // SPDX-License-Identifier: MIT // -#include "armnn/ArmNN.hpp" -#include "armnn/Descriptors.hpp" -#include "Graph.hpp" -#include "armnn/IRuntime.hpp" -#include "armnn/INetwork.hpp" -#include "Optimizer.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/test/QuantizeHelper.hpp" +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index e38252917d..b297a74785 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -2,18 +2,19 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // +#include "GraphUtils.hpp" + #include +#include -#include "armnn/ArmNN.hpp" -#include "Graph.hpp" -#include "Layer.hpp" -#include "armnn/TypesUtils.hpp" -#include "armnn/Exceptions.hpp" +#include +#include +#include +#include +#include -#include "GraphUtils.hpp" -#include "backends/CpuTensorHandle.hpp" +#include -#include /// Checks that first comes before second in the order. bool CheckOrder(const armnn::Graph& graph, const armnn::Layer* first, const armnn::Layer* second) diff --git a/src/armnn/test/JsonPrinterTests.cpp b/src/armnn/test/JsonPrinterTests.cpp index aae7dba0f5..9c24fdaa7b 100644 --- a/src/armnn/test/JsonPrinterTests.cpp +++ b/src/armnn/test/JsonPrinterTests.cpp @@ -10,12 +10,12 @@ #include #include -#include "Profiling.hpp" -#include "armnn/Descriptors.hpp" -#include "armnn/IRuntime.hpp" -#include "armnn/INetwork.hpp" -#include "backends/test/ClContextControlFixture.hpp" -#include "backends/ClWorkloadFactory.hpp" +#include +#include +#include +#include +#include +#include BOOST_FIXTURE_TEST_SUITE(JsonPrinterTests, ClProfilingContextControlFixture) diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp index 2f36f4da09..c342f22ced 100644 --- a/src/armnn/test/NetworkTests.cpp +++ b/src/armnn/test/NetworkTests.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "GraphUtils.hpp" diff --git a/src/armnn/test/OpenClTimerTest.cpp b/src/armnn/test/OpenClTimerTest.cpp index 37cb085604..76cffec4f3 100644 --- a/src/armnn/test/OpenClTimerTest.cpp +++ b/src/armnn/test/OpenClTimerTest.cpp @@ -5,18 +5,18 @@ #if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7 -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "backends/ClContextControl.hpp" -#include "backends/ClWorkloadFactory.hpp" -#include "backends/CpuTensorHandle.hpp" +#include +#include +#include +#include #include #include -#include "OpenClTimer.hpp" -#include "backends/test/TensorCopyUtils.hpp" +#include +#include #include "TensorHelpers.hpp" #include -#include "backends/WorkloadFactory.hpp" -#include "backends/test/WorkloadTestUtils.hpp" +#include +#include using namespace armnn; diff --git a/src/armnn/test/OptimizerTests.cpp b/src/armnn/test/OptimizerTests.cpp index f8dff366da..9d351babd8 100644 --- a/src/armnn/test/OptimizerTests.cpp +++ b/src/armnn/test/OptimizerTests.cpp @@ -4,11 +4,11 @@ // #include -#include "armnn/ArmNN.hpp" -#include "Graph.hpp" -#include "Optimizer.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "FloatingPointConverter.hpp" +#include +#include +#include +#include +#include namespace { diff --git a/src/armnn/test/TensorHelpers.hpp b/src/armnn/test/TensorHelpers.hpp index a4d3cf62c7..7f3ac9ec95 100644 --- a/src/armnn/test/TensorHelpers.hpp +++ b/src/armnn/test/TensorHelpers.hpp @@ -16,9 +16,9 @@ #include #include -#include "armnn/Tensor.hpp" +#include -#include "backends/test/QuantizeHelper.hpp" +#include #include diff --git a/src/backends/CMakeLists.txt b/src/backends/CMakeLists.txt index f806caac27..c9c5cc1a7e 100644 --- a/src/backends/CMakeLists.txt +++ b/src/backends/CMakeLists.txt @@ -4,13 +4,6 @@ # list(APPEND armnnBackendsCommon_sources - ClContextControl.cpp - ClContextControl.hpp - ClLayerSupport.cpp - ClLayerSupport.hpp - ClWorkloadFactory.cpp - ClWorkloadFactory.hpp - ClWorkloads.hpp CpuTensorHandle.cpp CpuTensorHandleFwd.hpp CpuTensorHandle.hpp @@ -33,13 +26,6 @@ list(APPEND armnnBackendsCommon_sources WorkloadUtils.hpp ) -if(ARMCOMPUTECL) - # Additionally include source files for ARM Compute OpenCL backend - list(APPEND armnnBackendsCommon_sources - ClTensorHandle.hpp - ) -endif() - add_library(armnnBackendsCommon STATIC ${armnnBackendsCommon_sources}) target_include_directories(armnnBackendsCommon PRIVATE ${PROJECT_SOURCE_DIR}/src) target_include_directories(armnnBackendsCommon PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) diff --git a/src/backends/ClContextControl.cpp b/src/backends/ClContextControl.cpp deleted file mode 100644 index e8b21c942d..0000000000 --- a/src/backends/ClContextControl.cpp +++ /dev/null @@ -1,235 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClContextControl.hpp" - -#include "armnn/Exceptions.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include -#include -#endif - -#include -#include -#include -#include -#include - -#include "LeakChecking.hpp" - -namespace cl -{ -class Context; -class CommandQueue; -class Device; -} - -namespace armnn -{ - -ClContextControl::ClContextControl(IGpuAccTunedParameters* clTunedParameters, - bool profilingEnabled) - : m_clTunedParameters(boost::polymorphic_downcast(clTunedParameters)) - , m_ProfilingEnabled(profilingEnabled) -{ - // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. - boost::ignore_unused(m_ProfilingEnabled); - -#ifdef ARMCOMPUTECL_ENABLED - try - { - std::vector platforms; - cl::Platform::get(&platforms); - - // Selects default platform for the first element. - cl::Platform::setDefault(platforms[0]); - - std::vector devices; - platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); - - // Selects default device for the first element. - cl::Device::setDefault(devices[0]); - } - catch (const cl::Error& clError) - { - throw ClRuntimeUnavailableException(boost::str(boost::format( - "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" - ) % clError.what() % clError.err())); - } - - // Removes the use of global CL context. - cl::Context::setDefault(cl::Context{}); - BOOST_ASSERT(cl::Context::getDefault()() == NULL); - - // Removes the use of global CL command queue. - cl::CommandQueue::setDefault(cl::CommandQueue{}); - BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); - - // Always load the OpenCL runtime. - LoadOpenClRuntime(); -#endif -} - -ClContextControl::~ClContextControl() -{ -#ifdef ARMCOMPUTECL_ENABLED - // Load the OpencCL runtime without the tuned parameters to free the memory for them. - try - { - UnloadOpenClRuntime(); - } - catch (const cl::Error& clError) - { - // This should not happen, it is ignored if it does. - - // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an - // exception of type std::length_error. - // Using stderr instead in this context as there is no point in nesting try-catch blocks here. - std::cerr << "A CL error occurred unloading the runtime tuner parameters: " - << clError.what() << ". CL error code is: " << clError.err() << std::endl; - } -#endif -} - -void ClContextControl::LoadOpenClRuntime() -{ - DoLoadOpenClRuntime(true); -} - -void ClContextControl::UnloadOpenClRuntime() -{ - DoLoadOpenClRuntime(false); -} - -void ClContextControl::DoLoadOpenClRuntime(bool useTunedParameters) -{ -#ifdef ARMCOMPUTECL_ENABLED - cl::Device device = cl::Device::getDefault(); - cl::Context context; - cl::CommandQueue commandQueue; - - if (arm_compute::CLScheduler::get().context()() != NULL) - { - // Wait for all queued CL requests to finish before reinitialising it. - arm_compute::CLScheduler::get().sync(); - } - - try - { - arm_compute::CLKernelLibrary::get().clear_programs_cache(); - // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no - // context references); it is initialised again, with a proper context, later. - arm_compute::CLScheduler::get().init(context, commandQueue, device); - arm_compute::CLKernelLibrary::get().init(".", context, device); - - { - // - // Here we replace the context with a new one in which - // the memory leak checks show it as an extra allocation but - // because of the scope of the leak checks, it doesn't count - // the disposal of the original object. On the other hand it - // does count the creation of this context which it flags - // as a memory leak. By adding the following line we prevent - // this to happen. - // - ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); - context = cl::Context(device); - } - - // NOTE: In this specific case profiling has to be enabled on the command queue - // in order for the CLTuner to work. - bool profilingNeededForClTuner = useTunedParameters && m_clTunedParameters && - m_clTunedParameters->m_Mode == IGpuAccTunedParameters::Mode::UpdateTunedParameters; - - if (m_ProfilingEnabled || profilingNeededForClTuner) - { - // Create a new queue with profiling enabled. - commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); - } - else - { - // Use default queue. - commandQueue = cl::CommandQueue(context, device); - } - } - catch (const cl::Error& clError) - { - throw ClRuntimeUnavailableException(boost::str(boost::format( - "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" - ) % clError.what() % clError.err())); - } - - // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. - arm_compute::CLKernelLibrary::get().init(".", context, device); - - arm_compute::ICLTuner* tuner = nullptr; - if (useTunedParameters && m_clTunedParameters) - { - tuner = &m_clTunedParameters->m_Tuner; - } - arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); -#endif -} - -void ClContextControl::ClearClCache() -{ - DoLoadOpenClRuntime(true); -} - -armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode) -{ - return new ClTunedParameters(mode); -} - -armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode) -{ - return IGpuAccTunedParametersPtr(CreateRaw(mode), &IGpuAccTunedParameters::Destroy); -} - -void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) -{ - delete params; -} - -ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode) - : m_Mode(mode) -#ifdef ARMCOMPUTECL_ENABLED - , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) -#endif -{ -} - -void ClTunedParameters::Load(const char* filename) -{ -#ifdef ARMCOMPUTECL_ENABLED - try - { - m_Tuner.load_from_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + - e.what()); - } -#endif -} - -void ClTunedParameters::Save(const char* filename) const -{ -#ifdef ARMCOMPUTECL_ENABLED - try - { - m_Tuner.save_to_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + - e.what()); - } -#endif -} - -} // namespace armnn diff --git a/src/backends/ClContextControl.hpp b/src/backends/ClContextControl.hpp deleted file mode 100644 index 5ac56423bd..0000000000 --- a/src/backends/ClContextControl.hpp +++ /dev/null @@ -1,62 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "armnn/IRuntime.hpp" - -#ifdef ARMCOMPUTECL_ENABLED -#include -#endif - -namespace armnn -{ - -class IGpuAccTunedParameters; -class ClTunedParameters; - -// ARM Compute OpenCL context control. -class ClContextControl -{ -public: - - ClContextControl(IGpuAccTunedParameters* clTunedParameters = nullptr, - bool profilingEnabled = false); - - virtual ~ClContextControl(); - - void LoadOpenClRuntime(); - - // Users should call this (after freeing all of the cl::Context objects they use) - // to release the cached memory used by the compute library. - void UnloadOpenClRuntime(); - - // Clear the CL cache, without losing the tuned parameter settings. - void ClearClCache(); - -private: - - void DoLoadOpenClRuntime(bool useTunedParameters); - - ClTunedParameters* m_clTunedParameters; - - bool m_ProfilingEnabled; -}; - -class ClTunedParameters : public IGpuAccTunedParameters -{ -public: - ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode); - - virtual void Load(const char* filename); - virtual void Save(const char* filename) const; - - Mode m_Mode; - -#ifdef ARMCOMPUTECL_ENABLED - arm_compute::CLTuner m_Tuner; -#endif -}; - -} // namespace armnn diff --git a/src/backends/ClLayerSupport.cpp b/src/backends/ClLayerSupport.cpp deleted file mode 100644 index c003d55397..0000000000 --- a/src/backends/ClLayerSupport.cpp +++ /dev/null @@ -1,478 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "LayerSupportCommon.hpp" - -#include "ClLayerSupport.hpp" -#include "InternalTypes.hpp" -#include -#include -#include - -#include - -#ifdef ARMCOMPUTECL_ENABLED -#include "ClWorkloads/ClAdditionWorkload.hpp" -#include "ClWorkloads/ClActivationFloatWorkload.hpp" -#include "ClWorkloads/ClBatchNormalizationFloatWorkload.hpp" -#include "ClWorkloads/ClConvertFp16ToFp32Workload.hpp" -#include "ClWorkloads/ClConvertFp32ToFp16Workload.hpp" -#include "ClWorkloads/ClConvolution2dBaseWorkload.hpp" -#include "ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp" -#include "ClWorkloads/ClDivisionFloatWorkload.hpp" -#include "ClWorkloads/ClL2NormalizationFloatWorkload.hpp" -#include "ClWorkloads/ClMultiplicationFloatWorkload.hpp" -#include "ClWorkloads/ClFullyConnectedWorkload.hpp" -#include "ClWorkloads/ClPadWorkload.hpp" -#include "ClWorkloads/ClPooling2dBaseWorkload.hpp" -#include "ClWorkloads/ClPermuteWorkload.hpp" -#include "ClWorkloads/ClNormalizationFloatWorkload.hpp" -#include "ClWorkloads/ClSoftmaxBaseWorkload.hpp" -#include "ClWorkloads/ClSubtractionWorkload.hpp" -#include "ClWorkloads/ClLstmFloatWorkload.hpp" -#endif - -using namespace boost; - -namespace armnn -{ -namespace -{ -template -bool IsMatchingSize2d(const TensorInfo& weightInfo) -{ - // Width & Height must match. - return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); -} - -template -bool IsMatchingStride(uint32_t actualStride) -{ - return ValidStride == actualStride; -} - -template -bool IsMatchingStride(uint32_t actualStride) -{ - return IsMatchingStride(actualStride) || IsMatchingStride(actualStride); -}; - -bool IsClBackendSupported(std::string* reasonIfUnsupported) -{ -#if ARMCOMPUTECL_ENABLED - return true; -#else - if (reasonIfUnsupported != nullptr) - { - *reasonIfUnsupported = "The armnn library has been built without CL support"; - } - return false; -#endif -} - -#if ARMCOMPUTECL_ENABLED -#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) -#else -#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) -#endif - -#if ARMCOMPUTECL_ENABLED -template -inline bool IsWorkloadSupported(FuncType&& func, std::string* reasonIfUnsupported, Args&&... args) -{ - arm_compute::Status aclStatus = func(std::forward(args)...); - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - return supported; -} - -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); -#else -#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ - return IsClBackendSupported(reasonIfUnsupported); -#endif - -} //namespace - -template -bool IsSupportedForDataTypeCl(std::string* reasonIfUnsupported, - DataType dataType, - FloatFunc floatFuncPtr, - Uint8Func uint8FuncPtr, - Params&&... params) -{ - return IsClBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - dataType, - floatFuncPtr, - floatFuncPtr, - uint8FuncPtr, - std::forward(params)...); -} - -bool IsActivationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClActivationWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor); -} - -bool IsAdditionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return FORWARD_CL_LAYER_SUPPORT_FUNC(ClAdditionValidate(input0, - input1, - output, - reasonIfUnsupported)); -} - -bool IsBatchNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchNormalizationValidate, - reasonIfUnsupported, - input, - output, - mean, - var, - beta, - gamma, - descriptor); -} - -bool IsConstantSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) -{ - bool isSupported = false; - - bool strideXIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideX); - bool strideXIsThree = IsMatchingStride<3>(desc.m_StrideX); - - bool strideYIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideY); - bool strideYIsThree = IsMatchingStride<3>(desc.m_StrideY); - - bool strideIsOneOrTwo = strideXIsOneOrTwo && strideYIsOneOrTwo; - bool strideIsOneOrTwoOrThree = ( strideXIsOneOrTwo || strideXIsThree ) && ( strideYIsOneOrTwo || strideYIsThree ); - - // 1x1 convolution with strides of 1,2,3. - isSupported |= IsMatchingSize2d<1>(weightInfo) && ( strideIsOneOrTwoOrThree ); - - // 3x3 convolution with strides of 1,2. - isSupported |= IsMatchingSize2d<3>(weightInfo) && ( strideIsOneOrTwo ); - - // 5x5 convolution with strides of 1,2 - isSupported |= IsMatchingSize2d<5>(weightInfo) && ( strideIsOneOrTwo ); - - //Fall back to normal convolution for the asymmetric padding case. - if (desc.m_PadLeft != desc.m_PadRight || - desc.m_PadTop != desc.m_PadBottom) - { - //Direct convolution does not support asymmetric padding yet. - isSupported = false; - } - - return isSupported; -} - -bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported, - const Convolution2dDescriptor& parameters, - const TensorInfo& weightInfo) -{ - return IsClDirectConvolution2dSupported(weightInfo, parameters); -} - -bool IsConvolution2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, - reasonIfUnsupported, - input, - output, - descriptor, - weights, - biases); -} - -bool IsDivisionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsSubtractionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return FORWARD_CL_LAYER_SUPPORT_FUNC(ClSubtractionValidate(input0, - input1, - output, - reasonIfUnsupported)); -} - -bool IsFullyConnectedSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClFullyConnectedWorkloadValidate, - reasonIfUnsupported, - input, - output, - weights, - biases, - descriptor); -} - -bool IsInputSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsL2NormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsMergerSupportedCl(const std::vector inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeCl(reasonIfUnsupported, - inputs[0]->GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsMultiplicationSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, - reasonIfUnsupported, - input0, - input1, - output); -} - -bool IsNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsOutputSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - output.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsPadSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - return FORWARD_CL_LAYER_SUPPORT_FUNC(ClPadValidate(input, output, descriptor, reasonIfUnsupported)); -} - -bool IsPermuteSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(output); - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, descriptor); -} - -bool IsPooling2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); -} - -bool IsResizeBilinearSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsSoftmaxSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output); -} - -bool IsSplitterSupportedCl(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(descriptor); - return IsSupportedForDataTypeCl(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); -} - -bool IsFakeQuantizationSupportedCl(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - ignore_unused(descriptor); - return false; -} - -bool IsReshapeSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported) -{ - ignore_unused(input); - return true; -} - -bool IsFloorSupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - ignore_unused(output); - return IsClBackendSupported(reasonIfUnsupported) && - IsSupportedForDataTypeGeneric(reasonIfUnsupported, - input.GetDataType(), - &FalseFuncF16<>, - &TrueFunc<>, - &FalseFuncU8<>); -} - -bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, reasonIfUnsupported, - input, outputStateIn, cellStateIn, scratchBuffer, outputStateOut, cellStateOut, - output, descriptor, inputToForgetWeights, inputToCellWeights, - inputToOutputWeights, recurrentToForgetWeights, - recurrentToCellWeights, recurrentToOutputWeights, - forgetGateBias, cellBias, outputGateBias, - inputToInputWeights, recurrentToInputWeights, - cellToInputWeights, inputGateBias, projectionWeights, - projectionBias, cellToForgetWeights, cellToOutputWeights); -} - -bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp16ToFp32WorkloadValidate, - reasonIfUnsupported, - input, - output, - reasonIfUnsupported); -} - -bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp32ToFp16WorkloadValidate, - reasonIfUnsupported, - input, - output, - reasonIfUnsupported); -} - -bool IsMeanSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - return false; -} - -} diff --git a/src/backends/ClLayerSupport.hpp b/src/backends/ClLayerSupport.hpp deleted file mode 100644 index 700d71801d..0000000000 --- a/src/backends/ClLayerSupport.hpp +++ /dev/null @@ -1,164 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include -#include -#include -#include - -#include - -namespace armnn -{ -bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); -bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, - const DepthwiseConvolution2dDescriptor& parameters, - const TensorInfo& weights); - -bool IsActivationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsAdditionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsBatchNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsConstantSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvolution2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases, - std::string* reasonIfUnsupported = nullptr); - -bool IsDivisionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsSubtractionSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsFullyConnectedSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsInputSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsL2NormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); - -bool IsMergerSupportedCl(const std::vector inputs, - const OriginsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsMultiplicationSupportedCl(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsNormalizationSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsOutputSupportedCl(const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsPadSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsPermuteSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const PermuteDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsPooling2dSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsResizeBilinearSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsSoftmaxSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const SoftmaxDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsSplitterSupportedCl(const TensorInfo& input, - const ViewsDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsFakeQuantizationSupportedCl(const TensorInfo& input, - const FakeQuantizationDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsReshapeSupportedCl(const TensorInfo& input, - std::string* reasonIfUnsupported = nullptr); - -bool IsFloorSupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsMeanSupportedCl(const TensorInfo& input, - const TensorInfo& output, - const MeanDescriptor& descriptor, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported = nullptr); - -} diff --git a/src/backends/ClTensorHandle.hpp b/src/backends/ClTensorHandle.hpp deleted file mode 100644 index 556e4479b6..0000000000 --- a/src/backends/ClTensorHandle.hpp +++ /dev/null @@ -1,141 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -namespace armnn -{ - - -class IClTensorHandle : public ITensorHandle -{ -public: - virtual arm_compute::ICLTensor& GetTensor() = 0; - virtual arm_compute::ICLTensor const& GetTensor() const = 0; - virtual arm_compute::DataType GetDataType() const = 0; - virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; -}; - -class ClTensorHandle : public IClTensorHandle -{ -public: - ClTensorHandle(const TensorInfo& tensorInfo) - { - armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); - } - - ClTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) - { - armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); - } - - arm_compute::CLTensor& GetTensor() override { return m_Tensor; } - arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } - virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);} - - virtual void Manage() override - { - assert(m_MemoryGroup != nullptr); - m_MemoryGroup->manage(&m_Tensor); - } - - virtual const void* Map(bool blocking = true) const override - { - const_cast(&m_Tensor)->map(blocking); - return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override { const_cast(&m_Tensor)->unmap(); } - - virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } - - virtual ITensorHandle* GetParent() const override { return nullptr; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override - { - m_MemoryGroup = boost::polymorphic_pointer_downcast(memoryGroup); - } - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } -private: - arm_compute::CLTensor m_Tensor; - std::shared_ptr m_MemoryGroup; -}; - -class ClSubTensorHandle : public IClTensorHandle -{ -public: - ClSubTensorHandle(IClTensorHandle* parent, - const arm_compute::TensorShape& shape, - const arm_compute::Coordinates& coords) - : m_Tensor(&parent->GetTensor(), shape, coords) - { - parentHandle = parent; - } - - arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } - arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } - - virtual void Allocate() override {} - virtual void Manage() override {} - - virtual const void* Map(bool blocking = true) const override - { - const_cast(&m_Tensor)->map(blocking); - return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); - } - virtual void Unmap() const override { const_cast(&m_Tensor)->unmap(); } - - virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } - - virtual ITensorHandle* GetParent() const override { return parentHandle; } - - virtual arm_compute::DataType GetDataType() const override - { - return m_Tensor.info()->data_type(); - } - - virtual void SetMemoryGroup(const std::shared_ptr&) override {} - - TensorShape GetStrides() const override - { - return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); - } - - TensorShape GetShape() const override - { - return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); - } - -private: - mutable arm_compute::CLSubTensor m_Tensor; - ITensorHandle* parentHandle = nullptr; - -}; - -} diff --git a/src/backends/ClWorkloadFactory.cpp b/src/backends/ClWorkloadFactory.cpp deleted file mode 100644 index 6d7ff3d4e3..0000000000 --- a/src/backends/ClWorkloadFactory.cpp +++ /dev/null @@ -1,506 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#include "ClWorkloadFactory.hpp" - -#include -#include - -#include -#include -#include - -#ifdef ARMCOMPUTECL_ENABLED -#include -#include -#include - -#include "ClWorkloads.hpp" - -#include -#include - -#include -#endif - -#include "MakeWorkloadHelper.hpp" - -#include -#include -#include - -namespace armnn -{ - -bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, - boost::optional dataType, - std::string& outReasonIfUnsupported) -{ - return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported); -} - -#ifdef ARMCOMPUTECL_ENABLED - -ClWorkloadFactory::ClWorkloadFactory() -: m_MemoryManager(std::make_unique()) -{ -} - -std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - std::unique_ptr tensorHandle = std::make_unique(tensorInfo); - tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); - - return tensorHandle; -} - -std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const -{ - std::unique_ptr tensorHandle = std::make_unique(tensorInfo, dataLayout); - tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); - - return tensorHandle; -} - -std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - BOOST_ASSERT(parent.GetType() == ITensorHandle::CL); - - arm_compute::Coordinates coords; - arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); - - coords.set_num_dimensions(subTensorShape.GetNumDimensions()); - for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) - { - // Arm compute indexes tensor coords in reverse order. - unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; - coords.set(i, boost::numeric_cast(subTensorOrigin[revertedIndex])); - } - - return std::make_unique( - boost::polymorphic_downcast(&parent), shape, coords); -} - -std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateFullyConnected( - const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info, - m_MemoryManager.GetIntraLayerManager()); -} - -std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload, - ClAdditionWorkload>(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateMultiplication( - const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateDivision( - const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload, - ClSubtractionWorkload>(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) - { - throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); - } - - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateResizeBilinear( - const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateFakeQuantization( - const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return std::make_unique(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload(descriptor, info); -} - -std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return MakeWorkload, - ClPadWorkload>(descriptor, info); -} - -void ClWorkloadFactory::Finalize() -{ - m_MemoryManager.Finalize(); -} - -void ClWorkloadFactory::Release() -{ - m_MemoryManager.Release(); -} - -void ClWorkloadFactory::Acquire() -{ - m_MemoryManager.Acquire(); -} - -#else // #if ARMCOMPUTECL_ENABLED - -ClWorkloadFactory::ClWorkloadFactory() -{ -} - -std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( - const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( - const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const -{ - return nullptr; -} - -void ClWorkloadFactory::Finalize() -{ -} - -void ClWorkloadFactory::Release() -{ -} - -void ClWorkloadFactory::Acquire() -{ -} - -#endif // #if ARMCOMPUTECL_ENABLED - -} // namespace armnn diff --git a/src/backends/ClWorkloadFactory.hpp b/src/backends/ClWorkloadFactory.hpp deleted file mode 100644 index 59ae3b343a..0000000000 --- a/src/backends/ClWorkloadFactory.hpp +++ /dev/null @@ -1,139 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include - -#include -#include - -#include "memory/BaseMemoryManager.hpp" - -namespace armnn -{ - -// ARM Compute OpenCL workload factory. -class ClWorkloadFactory : public IWorkloadFactory -{ -public: - ClWorkloadFactory(); - - virtual Compute GetCompute() const override { return Compute::GpuAcc; } - - static bool IsLayerSupported(const Layer& layer, boost::optional dataType, - std::string& outReasonIfUnsupported); - - virtual bool SupportsSubTensors() const override { return true; } - - virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, - TensorShape const& subTensorShape, - unsigned int const* subTensorOrigin) const override; - - virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override; - - virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo, - DataLayout dataLayout) const override; - - virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateDepthwiseConvolution2d( - const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, - const WorkloadInfo& Info) const override; - - virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, - const WorkloadInfo& info) const override; - - virtual void Finalize() override; - - virtual void Release() override; - - virtual void Acquire() override; - -private: - -#ifdef ARMCOMPUTECL_ENABLED - mutable ClMemoryManager m_MemoryManager; -#endif -}; - -} // namespace armnn diff --git a/src/backends/ClWorkloads/CMakeLists.txt b/src/backends/ClWorkloads/CMakeLists.txt deleted file mode 100644 index ec61d534f0..0000000000 --- a/src/backends/ClWorkloads/CMakeLists.txt +++ /dev/null @@ -1,91 +0,0 @@ -# -# Copyright © 2017 Arm Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -list(APPEND armnnClBackend_sources - ClActivationFloatWorkload.cpp - ClActivationFloatWorkload.hpp - ClActivationUint8Workload.cpp - ClActivationUint8Workload.hpp - ClAdditionWorkload.cpp - ClAdditionWorkload.hpp - ClBaseConstantWorkload.cpp - ClBaseConstantWorkload.hpp - ClBaseMergerWorkload.hpp - ClBaseSplitterWorkload.hpp - ClBatchNormalizationFloatWorkload.cpp - ClBatchNormalizationFloatWorkload.hpp - ClConstantFloatWorkload.cpp - ClConstantFloatWorkload.hpp - ClConstantUint8Workload.cpp - ClConstantUint8Workload.hpp - ClConvertFp16ToFp32Workload.cpp - ClConvertFp16ToFp32Workload.hpp - ClConvertFp32ToFp16Workload.cpp - ClConvertFp32ToFp16Workload.hpp - ClConvolution2dBaseWorkload.cpp - ClConvolution2dBaseWorkload.hpp - ClConvolution2dFloatWorkload.cpp - ClConvolution2dFloatWorkload.hpp - ClConvolution2dUint8Workload.cpp - ClConvolution2dUint8Workload.hpp - ClDepthwiseConvolutionBaseWorkload.cpp - ClDepthwiseConvolutionBaseWorkload.hpp - ClDepthwiseConvolutionFloatWorkload.cpp - ClDepthwiseConvolutionFloatWorkload.hpp - ClDepthwiseConvolutionUint8Workload.cpp - ClDepthwiseConvolutionUint8Workload.hpp - ClDivisionFloatWorkload.cpp - ClDivisionFloatWorkload.hpp - ClFloorFloatWorkload.cpp - ClFloorFloatWorkload.hpp - ClFullyConnectedWorkload.cpp - ClFullyConnectedWorkload.hpp - ClL2NormalizationFloatWorkload.cpp - ClL2NormalizationFloatWorkload.hpp - ClLstmFloatWorkload.cpp - ClLstmFloatWorkload.hpp - ClMergerFloatWorkload.cpp - ClMergerFloatWorkload.hpp - ClMergerUint8Workload.cpp - ClMergerUint8Workload.hpp - ClMultiplicationFloatWorkload.cpp - ClMultiplicationFloatWorkload.hpp - ClNormalizationFloatWorkload.cpp - ClNormalizationFloatWorkload.hpp - ClPadWorkload.cpp - ClPadWorkload.hpp - ClPermuteWorkload.cpp - ClPermuteWorkload.hpp - ClPooling2dBaseWorkload.cpp - ClPooling2dBaseWorkload.hpp - ClPooling2dFloatWorkload.cpp - ClPooling2dFloatWorkload.hpp - ClPooling2dUint8Workload.cpp - ClPooling2dUint8Workload.hpp - ClReshapeFloatWorkload.cpp - ClReshapeFloatWorkload.hpp - ClReshapeUint8Workload.cpp - ClReshapeUint8Workload.hpp - ClResizeBilinearFloatWorkload.cpp - ClResizeBilinearFloatWorkload.hpp - ClSoftmaxBaseWorkload.cpp - ClSoftmaxBaseWorkload.hpp - ClSoftmaxFloatWorkload.cpp - ClSoftmaxFloatWorkload.hpp - ClSoftmaxUint8Workload.cpp - ClSoftmaxUint8Workload.hpp - ClSplitterFloatWorkload.cpp - ClSplitterFloatWorkload.hpp - ClSplitterUint8Workload.cpp - ClSplitterUint8Workload.hpp - ClSubtractionWorkload.cpp - ClSubtractionWorkload.hpp - ClWorkloadUtils.hpp -) - -add_library(armnnClBackend STATIC ${armnnClBackend_sources}) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) -target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp b/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp deleted file mode 100644 index 10eef66ef2..0000000000 --- a/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClActivationFloatWorkload.hpp" -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); - - if (input.GetDataType() == DataType::QuantisedAsymm8 && - activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) - { - return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, - "CL: Logistic Activations unsupported with QAsymm8 data type."}; - } - - return arm_compute::CLActivationLayer::validate(&aclInput, - &aclOutput, - activationLayerInfo); -} - -ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1); - - const arm_compute::ActivationLayerInfo activationLayerInfo = - ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_ActivationLayer.configure(&input, &output, activationLayerInfo); -} - -void ClActivationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute"); - m_ActivationLayer.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp b/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp deleted file mode 100644 index e1b6fe13d8..0000000000 --- a/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ -arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const ActivationDescriptor& descriptor); - -// Activation layer execution. -class ClActivationFloatWorkload : public FloatWorkload -{ -public: - ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLActivationLayer m_ActivationLayer; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/backends/ClWorkloads/ClActivationUint8Workload.cpp deleted file mode 100644 index f41a97ae24..0000000000 --- a/src/backends/ClWorkloads/ClActivationUint8Workload.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClActivationUint8Workload.hpp" -#include - -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : Uint8Workload(descriptor, info) -{ - auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); - arm_compute::ActivationLayerInfo layerInfo(activation, - m_Data.m_Parameters.m_A, - m_Data.m_Parameters.m_B); - - m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_ActivationLayer.configure(&input, &output, layerInfo); -} - -void ClActivationUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute"); - - m_ActivationLayer.run(); -} - -} //namespace Armnn - - diff --git a/src/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/backends/ClWorkloads/ClActivationUint8Workload.hpp deleted file mode 100644 index bb2ff58853..0000000000 --- a/src/backends/ClWorkloads/ClActivationUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -// Activation layer execution. -class ClActivationUint8Workload : public Uint8Workload -{ -public: - ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLActivationLayer m_ActivationLayer; -}; - -} //namespace armnn - - - diff --git a/src/backends/ClWorkloads/ClAdditionWorkload.cpp b/src/backends/ClWorkloads/ClAdditionWorkload.cpp deleted file mode 100644 index f43d3c126a..0000000000 --- a/src/backends/ClWorkloads/ClAdditionWorkload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClAdditionWorkload.hpp" - -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -template -ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); -} - -template -void ClAdditionWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); - m_Layer.run(); -} - -bool ClAdditionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - g_AclConvertPolicy); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return supported; -} - -} //namespace armnn - -template class armnn::ClAdditionWorkload; -template class armnn::ClAdditionWorkload; diff --git a/src/backends/ClWorkloads/ClAdditionWorkload.hpp b/src/backends/ClWorkloads/ClAdditionWorkload.hpp deleted file mode 100644 index b4706890d1..0000000000 --- a/src/backends/ClWorkloads/ClAdditionWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -template -class ClAdditionWorkload : public TypedWorkload -{ -public: - ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticAddition m_Layer; -}; - -bool ClAdditionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported); -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp deleted file mode 100644 index 56af3b3baa..0000000000 --- a/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBaseConstantWorkload.hpp" -#include -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -template class ClBaseConstantWorkload; -template class ClBaseConstantWorkload; - -template -void ClBaseConstantWorkload::Execute() const -{ - // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data - // on the first inference, then reused for subsequent inferences. - // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not - // have been configured at the time. - if (!m_RanOnce) - { - const ConstantQueueDescriptor& data = this->m_Data; - - BOOST_ASSERT(data.m_LayerOutput != nullptr); - arm_compute::CLTensor& output = static_cast(data.m_Outputs[0])->GetTensor(); - arm_compute::DataType computeDataType = static_cast(data.m_Outputs[0])->GetDataType(); - - switch (computeDataType) - { - case arm_compute::DataType::F16: - { - CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); - break; - } - case arm_compute::DataType::F32: - { - CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); - break; - } - case arm_compute::DataType::QASYMM8: - { - CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); - break; - } - default: - { - BOOST_ASSERT_MSG(false, "Unknown data type"); - break; - } - } - - m_RanOnce = true; - } -} - - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp deleted file mode 100644 index ca1db389dc..0000000000 --- a/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ -template -class ClBaseConstantWorkload : public TypedWorkload -{ -public: - ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : TypedWorkload(descriptor, info) - , m_RanOnce(false) - { - } - - void Execute() const override; - -private: - mutable bool m_RanOnce; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp deleted file mode 100644 index 420e074217..0000000000 --- a/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -// Base class template providing an implementation of the Merger layer common to all data types. -template -class ClBaseMergerWorkload : public TypedWorkload -{ -public: - using TypedWorkload::TypedWorkload; - - void Execute() const override - { - // With subtensors, merger is a no-op. - } -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp deleted file mode 100644 index 41f382cac8..0000000000 --- a/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -// Base class template providing an implementation of the Splitter layer common to all data types. -template -class ClBaseSplitterWorkload : public TypedWorkload -{ -public: - using TypedWorkload::TypedWorkload; - - void Execute() const override - { - // With subtensors, merger is a no-op. - } -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp deleted file mode 100644 index 4d5c20f9bd..0000000000 --- a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClBatchNormalizationFloatWorkload.hpp" -#include -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor &desc) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); - const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); - const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); - const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); - - return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, - &aclOutputInfo, - &aclMeanInfo, - &aclVarInfo, - &aclBetaInfo, - &aclGammaInfo, - desc.m_Eps); -} - -ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( - const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Mean = std::make_unique(); - BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); - - m_Variance = std::make_unique(); - BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); - - m_Gamma = std::make_unique(); - BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); - - m_Beta = std::make_unique(); - BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); - - m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, - &output, - m_Mean.get(), - m_Variance.get(), - m_Beta.get(), - m_Gamma.get(), - m_Data.m_Parameters.m_Eps); - - InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean); - InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance); - InitializeArmComputeClTensorData(*m_Beta, m_Data.m_Beta); - InitializeArmComputeClTensorData(*m_Gamma, m_Data.m_Gamma); - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_Layer.prepare(); - FreeUnusedTensors(); -} - -void ClBatchNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); - m_Layer.run(); -} - -void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_Mean); - FreeTensorIfUnused(m_Variance); - FreeTensorIfUnused(m_Gamma); - FreeTensorIfUnused(m_Beta); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp deleted file mode 100644 index 22c71b1073..0000000000 --- a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& mean, - const TensorInfo& var, - const TensorInfo& beta, - const TensorInfo& gamma, - const BatchNormalizationDescriptor& desc); - -class ClBatchNormalizationFloatWorkload : public FloatWorkload -{ -public: - ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - - using FloatWorkload::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLBatchNormalizationLayer m_Layer; - - std::unique_ptr m_Mean; - std::unique_ptr m_Variance; - std::unique_ptr m_Gamma; - std::unique_ptr m_Beta; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - - - - diff --git a/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp b/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp deleted file mode 100644 index 1565047c22..0000000000 --- a/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConstantFloatWorkload.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClConstantFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute"); - ClBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp b/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp deleted file mode 100644 index 0cbeaad9ea..0000000000 --- a/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseConstantWorkload.hpp" - -namespace armnn -{ -class ClConstantFloatWorkload : public ClBaseConstantWorkload -{ -public: - using ClBaseConstantWorkload::ClBaseConstantWorkload; - void Execute() const override; -}; - - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/backends/ClWorkloads/ClConstantUint8Workload.cpp deleted file mode 100644 index a5ef0321cd..0000000000 --- a/src/backends/ClWorkloads/ClConstantUint8Workload.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConstantUint8Workload.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClConstantUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute"); - ClBaseConstantWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/backends/ClWorkloads/ClConstantUint8Workload.hpp deleted file mode 100644 index 30556dc0d6..0000000000 --- a/src/backends/ClWorkloads/ClConstantUint8Workload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseConstantWorkload.hpp" - -namespace armnn -{ - -class ClConstantUint8Workload : public ClBaseConstantWorkload -{ -public: - using ClBaseConstantWorkload::ClBaseConstantWorkload; - void Execute() const override; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp deleted file mode 100644 index 534249aeac..0000000000 --- a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvertFp16ToFp32Workload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( - const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : - Float16ToFloat32Workload(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); -} - -void ClConvertFp16ToFp32Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); - m_Layer.run(); -} - -arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - if (input.GetDataType() != DataType::Float16) - { - *reasonIfUnsupported = "Input should be Float16"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - if (output.GetDataType() != DataType::Float32) - { - *reasonIfUnsupported = "Output should be Float32"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( - &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return aclStatus; -} - - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp deleted file mode 100644 index c72d2262b3..0000000000 --- a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload -{ -public: - - ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLDepthConvertLayer m_Layer; -}; - -arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported); - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp deleted file mode 100644 index 73b3cbc542..0000000000 --- a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvertFp32ToFp16Workload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( - const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : - Float32ToFloat16Workload(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); -} - -void ClConvertFp32ToFp16Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); - m_Layer.run(); -} - -arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - if (input.GetDataType() != DataType::Float32) - { - *reasonIfUnsupported = "Input should be Float32"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - if (output.GetDataType() != DataType::Float16) - { - *reasonIfUnsupported = "Output should be Float16"; - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); - } - - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( - &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return aclStatus; -} - - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp deleted file mode 100644 index fb6af02070..0000000000 --- a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload -{ -public: - - ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); - virtual void Execute() const override; - -private: - mutable arm_compute::CLDepthConvertLayer m_Layer; -}; - -arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - std::string* reasonIfUnsupported); - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp deleted file mode 100644 index 1f5094e143..0000000000 --- a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dBaseWorkload.hpp" -#include -#include -#include -#include - -#include - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.is_initialized()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); - - return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - layerInfo); -} - -} diff --git a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp deleted file mode 100644 index a983dba79a..0000000000 --- a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include - -#include - -#include - -namespace armnn -{ - -arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Convolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases); - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp deleted file mode 100644 index 72565966b8..0000000000 --- a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dFloatWorkload.hpp" -#include -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) - : FloatWorkload(descriptor, info) - , m_ConvolutionLayer(memoryManager) -{ - - // todo: check tensor shapes match. - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); - } - - m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_ConvolutionLayer.configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_ConvolutionLayer.prepare(); - FreeUnusedTensors(); -} - -void ClConvolution2dFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute"); - - m_ConvolutionLayer.run(); -} - -void ClConvolution2dFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp deleted file mode 100644 index 28ba53f38a..0000000000 --- a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include -#include - -#include - -namespace armnn -{ - -class ClConvolution2dFloatWorkload : public FloatWorkload -{ -public: - ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; - - std::unique_ptr m_KernelTensor; - std::unique_ptr m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp deleted file mode 100644 index a9faec0d77..0000000000 --- a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClConvolution2dUint8Workload.hpp" -#include -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) - : Uint8Workload(descriptor, info) - , m_ConvolutionLayer(memoryManager) -{ - // todo: check tensor shapes match - const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); - } - - m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_ConvolutionLayer.configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo); - - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_ConvolutionLayer.prepare(); - FreeUnusedTensors(); -} - -void ClConvolution2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute"); - - m_ConvolutionLayer.run(); -} - -void ClConvolution2dUint8Workload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp deleted file mode 100644 index f1f008b1b9..0000000000 --- a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include -#include - -#include - -namespace armnn -{ - -class ClConvolution2dUint8Workload : public Uint8Workload -{ -public: - ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; - - std::unique_ptr m_KernelTensor; - std::unique_ptr m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp deleted file mode 100644 index 18f45bb852..0000000000 --- a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp +++ /dev/null @@ -1,125 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionBaseWorkload.hpp" - -#include "TypeUtils.hpp" - -#include -#include -#include -#include - -namespace armnn -{ - -using namespace armcomputetensorutils; - -arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); - - arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; - - if (descriptor.m_BiasEnabled) - { - BOOST_ASSERT(biases.is_initialized()); - - aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); - optionalAclBiasesInfo = &aclBiasesInfo; - } - - const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); - const unsigned int aclDepthMultiplier = weights.GetShape()[0]; - - return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, - &aclWeightsInfo, - optionalAclBiasesInfo, - &aclOutputInfo, - aclPadStrideInfo, - aclDepthMultiplier); -} - -template -ClDepthwiseConvolutionBaseWorkload::ClDepthwiseConvolutionBaseWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload(descriptor, info) -{ - auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); - - m_KernelTensor = std::make_unique(); - BuildArmComputeTensor(*m_KernelTensor, weightInfo); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - } - - arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, - m_Data.m_Parameters.m_StrideY, - m_Data.m_Parameters.m_PadLeft, - m_Data.m_Parameters.m_PadRight, - m_Data.m_Parameters.m_PadTop, - m_Data.m_Parameters.m_PadBottom, - arm_compute::DimensionRoundingType::FLOOR); - - std::string name = std::string("ClDepthwiseConvolution") + - GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload"; - m_Data.ValidateInputsOutputs(name, 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - const unsigned int depthMultiplier = weightInfo.GetShape()[0]; - - //Check for optimisation opportunities. - bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); - if (use3x3Optimisation) - { - m_DepthwiseConvolutionLayer = std::make_unique(); - static_cast(m_DepthwiseConvolutionLayer.get())->configure( - &input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier); - } - else - { - m_DepthwiseConvolutionLayer = std::make_unique(); - static_cast(m_DepthwiseConvolutionLayer.get())->configure( - &input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier); - } - - BOOST_ASSERT(m_DepthwiseConvolutionLayer); -} - -template -void ClDepthwiseConvolutionBaseWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_KernelTensor); - FreeTensorIfUnused(m_BiasTensor); -} - -// Generate known implementations for linker -template class ClDepthwiseConvolutionBaseWorkload; -template class ClDepthwiseConvolutionBaseWorkload; - -} // namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp deleted file mode 100644 index 49a8b5d357..0000000000 --- a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include - -#include - -namespace armnn -{ - -arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const DepthwiseConvolution2dDescriptor& descriptor, - const TensorInfo& weights, - const boost::optional& biases); - -template -class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload -{ -public: - using TypedWorkload::m_Data; - - ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - -protected: - std::unique_ptr m_DepthwiseConvolutionLayer; - - std::unique_ptr m_KernelTensor; - std::unique_ptr m_BiasTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp deleted file mode 100644 index bc3b165490..0000000000 --- a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionFloatWorkload.hpp" - -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClDepthwiseConvolutionBaseWorkload(descriptor, info) -{ - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - m_DepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void ClDepthwiseConvolutionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute"); - BOOST_ASSERT(m_DepthwiseConvolutionLayer); - - m_DepthwiseConvolutionLayer->run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp deleted file mode 100644 index 4f9d5f332e..0000000000 --- a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClDepthwiseConvolutionBaseWorkload.hpp" - -namespace armnn -{ - -class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload -{ -public: - ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - void Execute() const override; -}; - -} //namespace armnn - - - - diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp deleted file mode 100644 index 4ea5590486..0000000000 --- a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDepthwiseConvolutionUint8Workload.hpp" - -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( - const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClDepthwiseConvolutionBaseWorkload(descriptor, info) -{ - InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); - - if (m_BiasTensor) - { - InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); - } - - m_DepthwiseConvolutionLayer->prepare(); - FreeUnusedTensors(); -} - -void ClDepthwiseConvolutionUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute"); - BOOST_ASSERT(m_DepthwiseConvolutionLayer); - - m_DepthwiseConvolutionLayer->run(); -} - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp deleted file mode 100644 index b9f676de94..0000000000 --- a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClDepthwiseConvolutionBaseWorkload.hpp" - -namespace armnn -{ - -class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload -{ -public: - ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info); - void Execute() const override; -}; - -} //namespace armnn - - diff --git a/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp b/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp deleted file mode 100644 index 2371789035..0000000000 --- a/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClDivisionFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); -} - - -ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_ArithmeticDivision.configure(&input0, &input1, &output); -} - -void ClDivisionFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); - - // Executes the layer. - m_ArithmeticDivision.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp b/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp deleted file mode 100644 index d34e11dab8..0000000000 --- a/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClDivisionFloatWorkload : public FloatWorkload -{ -public: - ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const - WorkloadInfo& info); - - using FloatWorkload::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp b/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp deleted file mode 100644 index d090a7da81..0000000000 --- a/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClFloorFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClFloorFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp b/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp deleted file mode 100644 index f269bcf30c..0000000000 --- a/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -class ClFloorFloatWorkload : public FloatWorkload -{ -public: - ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLFloor m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp b/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp deleted file mode 100644 index 1ec39926ad..0000000000 --- a/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClFullyConnectedWorkload.hpp" -#include -#include -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); - - arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; - if (descriptor.m_BiasEnabled) - { - aclBiases = BuildArmComputeTensorInfo(biases); - optionalAclBiases = &aclBiases; - } - - const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); - - return arm_compute::CLFullyConnectedLayer::validate(&aclInput, - &aclWeights, - optionalAclBiases, - &aclOutput, - fullyConnectedLayerInfo); -} - -ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) - : BaseWorkload(descriptor, info) - , m_FullyConnectedLayer(memoryManager) -{ - m_WeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasesTensor = std::make_unique(); - BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); - } - - m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); - - InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); - - if (m_BiasesTensor) - { - InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); - FreeUnusedTensors(); -} - -void ClFullyConnectedWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); - m_FullyConnectedLayer.run(); -} - -void ClFullyConnectedWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_WeightsTensor); - FreeTensorIfUnused(m_BiasesTensor); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp b/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp deleted file mode 100644 index a61610992e..0000000000 --- a/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include -#include - -#include - -namespace armnn -{ - -arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const TensorInfo& weights, - const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); - -class ClFullyConnectedWorkload : public armnn::BaseWorkload -{ -public: - ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, - const armnn::WorkloadInfo& info, - std::shared_ptr& memoryManager); - - using armnn::BaseWorkload::m_Data; - void Execute() const override; - -private: - mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; - - std::unique_ptr m_WeightsTensor; - std::unique_ptr m_BiasesTensor; - - void FreeUnusedTensors(); -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp deleted file mode 100644 index 0ca334348e..0000000000 --- a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClL2NormalizationFloatWorkload.hpp" -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - arm_compute::NormalizationLayerInfo normalizationInfo = - CreateAclNormalizationLayerInfoForL2Normalization(input); - - return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); -} - -ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); -} - -void ClL2NormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn - - - diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp deleted file mode 100644 index 20c0426208..0000000000 --- a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const L2NormalizationDescriptor& descriptor); - -class ClL2NormalizationFloatWorkload : public FloatWorkload -{ -public: - ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - // Purposely not a CLL2Normalize function. See constructor. - mutable arm_compute::CLNormalizationLayer m_Layer; -}; - -} //namespace armnn - - - - diff --git a/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp b/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp deleted file mode 100644 index 9e2563a3a5..0000000000 --- a/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp +++ /dev/null @@ -1,391 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClLstmFloatWorkload.hpp" -#include -#include -#include -#include - -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) - : FloatWorkload(descriptor, info) -{ - arm_compute::LSTMParams lstm_param; - - // Basic parameters - m_InputToForgetWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); - - m_InputToCellWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); - - m_InputToOutputWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); - - m_RecurrentToForgetWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); - - m_RecurrentToCellWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); - - m_RecurrentToOutputWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); - - m_ForgetGateBiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); - - m_CellBiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); - - m_OutputGateBiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); - - // for future reference: check the AndroidNN API for the logic here - if (!m_Data.m_Parameters.m_CifgEnabled) - { - m_InputToInputWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); - - m_RecurrentToInputWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); - - m_CellToInputWeightsTensor = std::make_unique(); - if (m_Data.m_CellToInputWeights != nullptr) - { - BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); - } - - m_InputGateBiasTensor = std::make_unique(); - BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); - - lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), - m_RecurrentToInputWeightsTensor.get(), - m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, - m_InputGateBiasTensor.get()); - } - - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - m_ProjectionWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); - - m_ProjectionBiasTensor = std::make_unique(); - if (m_Data.m_ProjectionBias != nullptr) - { - BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); - } - - lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), - m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); - } - - if (m_Data.m_Parameters.m_PeepholeEnabled) - { - m_CellToForgetWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); - - m_CellToOutputWeightsTensor = std::make_unique(); - BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); - - lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); - } - - const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - const arm_compute::ICLTensor& output_state_in = static_cast(m_Data.m_Inputs[1])->GetTensor(); - const arm_compute::ICLTensor& cell_state_in = static_cast(m_Data.m_Inputs[2])->GetTensor(); - - arm_compute::ICLTensor& output_state_out = static_cast(m_Data.m_Outputs[1])->GetTensor(); - arm_compute::ICLTensor& cell_state_out = static_cast(m_Data.m_Outputs[2])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[3])->GetTensor(); - - // Get the batch_size and the num_units from the cellStateIn dimensions - const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; - const unsigned int batch_size = boost::numeric_cast(inputTensorInfo.GetShape()[0]); - const unsigned int num_units = boost::numeric_cast(inputTensorInfo.GetShape()[1]); - - m_ScratchBuffer = std::make_unique(); - if (m_Data.m_Parameters.m_CifgEnabled) - { - // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG - armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32); - BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); - } - else - { - // scratch_buffer [num_units * 3, batch_size] without CIFG - armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32); - BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); - } - - float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; - float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; - - // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations - arm_compute::ActivationLayerInfo activationLayerInfo; - if (m_Data.m_Parameters.m_ActivationFunc == 0) - { - // no activation, do nothing - } - else if (m_Data.m_Parameters.m_ActivationFunc == 1) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::RELU); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 3) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 4) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); - } - else if (m_Data.m_Parameters.m_ActivationFunc == 6) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); - } - else - { - throw armnn::Exception("Wrong Type of Activation Function!"); - } - - - m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), - m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), - m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), - &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, - &cell_state_out, &output, lstm_param, activationLayerInfo, - cell_threshold, projection_threshold); - - armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); - - InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); - InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights); - InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights); - InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights); - InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights); - InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights); - InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias); - InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias); - InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias); - - if (!m_Data.m_Parameters.m_CifgEnabled) - { - InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights); - InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights); - if (m_Data.m_CellToInputWeights != nullptr) - { - InitializeArmComputeClTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights); - } - InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias); - } - - if (m_Data.m_Parameters.m_ProjectionEnabled) - { - InitializeArmComputeClTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights); - if (m_Data.m_ProjectionBias != nullptr) - { - InitializeArmComputeClTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias); - } - } - - if (m_Data.m_Parameters.m_PeepholeEnabled) - { - InitializeArmComputeClTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights); - InitializeArmComputeClTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights); - } - - // Force Compute Library to perform the necessary copying and reshaping, after which - // delete all the input tensors that will no longer be needed - m_LstmLayer.prepare(); - FreeUnusedTensors(); -} - -void ClLstmFloatWorkload::Execute() const -{ - m_LstmLayer.run(); -} - -arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor& descriptor, - const TensorInfo& inputToForgetWeights, - const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, - const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, - const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, - const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, - const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, - const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, - const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights) -{ - arm_compute::LSTMParams lstm_params_info; - - // The inputs and the outputs - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); - const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); - const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); - const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); - const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - // Basic parameters - const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights); - const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights); - const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights); - const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo - = BuildArmComputeTensorInfo(recurrentToForgetWeights); - const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo - = BuildArmComputeTensorInfo(recurrentToCellWeights); - const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo - = BuildArmComputeTensorInfo(recurrentToOutputWeights); - const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias); - const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias); - const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias); - - arm_compute::TensorInfo aclInputToInputWeightsInfo; - arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; - arm_compute::TensorInfo aclCellToInputWeightsInfo; - arm_compute::TensorInfo aclInputGateBiasInfo; - arm_compute::TensorInfo aclProjectionWeightsInfo; - arm_compute::TensorInfo aclProjectionBiasInfo; - arm_compute::TensorInfo aclCellToForgetWeightsInfo; - arm_compute::TensorInfo aclCellToOutputWeightsInfo; - - if (!descriptor.m_CifgEnabled) - { - armnn::TensorInfo inputToInputWInfo = *inputToInputWeights; - aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo); - armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights; - aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo); - - if (cellToInputWeights != nullptr) - { - armnn::TensorInfo cellToInputWInfo = *cellToInputWeights; - aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo); - } - armnn::TensorInfo inputGateBiasInfo = *inputGateBias; - aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo); - lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, - cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr, - &aclInputGateBiasInfo); - } - - if (descriptor.m_ProjectionEnabled) - { - const armnn::TensorInfo& projectionWInfo = *projectionWeights; - aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo); - - if (projectionBias != nullptr) - { - const armnn::TensorInfo& projectionBiasInfo = *projectionBias; - aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo); - } - lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, - projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr); - } - - if (descriptor.m_PeepholeEnabled) - { - const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights; - aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo); - const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights; - aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo); - lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); - } - - float cell_threshold = descriptor.m_ClippingThresCell; - float projection_threshold = descriptor.m_ClippingThresProj; - - // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations - arm_compute::ActivationLayerInfo activationLayerInfo; - if (descriptor.m_ActivationFunc == 0) - { - // no activation, do nothing - } - else if (descriptor.m_ActivationFunc == 1) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::RELU); - } - else if (descriptor.m_ActivationFunc == 3) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); - } - else if (descriptor.m_ActivationFunc == 4) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); - } - else if (descriptor.m_ActivationFunc == 6) - { - activationLayerInfo = arm_compute::ActivationLayerInfo( - arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); - } - else - { - throw armnn::Exception("Wrong Type of Activation Function!"); - } - - return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, - &aclInputToCellWeightsInfo, - &aclInputToOutputWeightsInfo, - &aclRecurrentToForgetWeightsInfo, - &aclRecurrentToCellWeightsInfo, - &aclRecurrentToOutputWeightsInfo, - &aclForgetGateBiasInfo, - &aclCellBiasInfo, - &aclOutputGateBiasInfo, - &aclOutputStateInInfo, &aclCellStateInInfo, - &aclScratchBufferInfo, &aclOutputStateOutInfo, - &aclCellStateOutInfo, &aclOutputInfo, - lstm_params_info, activationLayerInfo, - cell_threshold, projection_threshold); -} - -void ClLstmFloatWorkload::FreeUnusedTensors() -{ - FreeTensorIfUnused(m_InputToInputWeightsTensor); - FreeTensorIfUnused(m_InputToForgetWeightsTensor); - FreeTensorIfUnused(m_InputToCellWeightsTensor); - FreeTensorIfUnused(m_InputToOutputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); - FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); - FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); - FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); - FreeTensorIfUnused(m_CellToInputWeightsTensor); - FreeTensorIfUnused(m_CellToForgetWeightsTensor); - FreeTensorIfUnused(m_CellToOutputWeightsTensor); - FreeTensorIfUnused(m_InputGateBiasTensor); - FreeTensorIfUnused(m_ForgetGateBiasTensor); - FreeTensorIfUnused(m_CellBiasTensor); - FreeTensorIfUnused(m_OutputGateBiasTensor); - FreeTensorIfUnused(m_ProjectionWeightsTensor); - FreeTensorIfUnused(m_ProjectionBiasTensor); - FreeTensorIfUnused(m_ScratchBuffer); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp b/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp deleted file mode 100644 index 61d8fc3e6c..0000000000 --- a/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp +++ /dev/null @@ -1,68 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -#include - -namespace armnn -{ - -class ClLstmFloatWorkload : public FloatWorkload -{ -public: - ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLLSTMLayer m_LstmLayer; - - std::unique_ptr m_InputToInputWeightsTensor; - std::unique_ptr m_InputToForgetWeightsTensor; - std::unique_ptr m_InputToCellWeightsTensor; - std::unique_ptr m_InputToOutputWeightsTensor; - std::unique_ptr m_RecurrentToInputWeightsTensor; - std::unique_ptr m_RecurrentToForgetWeightsTensor; - std::unique_ptr m_RecurrentToCellWeightsTensor; - std::unique_ptr m_RecurrentToOutputWeightsTensor; - std::unique_ptr m_CellToInputWeightsTensor; - std::unique_ptr m_CellToForgetWeightsTensor; - std::unique_ptr m_CellToOutputWeightsTensor; - std::unique_ptr m_InputGateBiasTensor; - std::unique_ptr m_ForgetGateBiasTensor; - std::unique_ptr m_CellBiasTensor; - std::unique_ptr m_OutputGateBiasTensor; - std::unique_ptr m_ProjectionWeightsTensor; - std::unique_ptr m_ProjectionBiasTensor; - - std::unique_ptr m_ScratchBuffer; - - void FreeUnusedTensors(); -}; - -arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, - const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, - const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, - const TensorInfo& output, const LstmDescriptor &descriptor, - const TensorInfo& inputToForgetWeights, - const TensorInfo& inputToCellWeights, - const TensorInfo& inputToOutputWeights, - const TensorInfo& recurrentToForgetWeights, - const TensorInfo& recurrentToCellWeights, - const TensorInfo& recurrentToOutputWeights, - const TensorInfo& forgetGateBias, const TensorInfo& cellBias, - const TensorInfo& outputGateBias, - const TensorInfo* inputToInputWeights, - const TensorInfo* recurrentToInputWeights, - const TensorInfo* cellToInputWeights, - const TensorInfo* inputGateBias, - const TensorInfo* projectionWeights, - const TensorInfo* projectionBias, - const TensorInfo* cellToForgetWeights, - const TensorInfo* cellToOutputWeights); -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp b/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp deleted file mode 100644 index 151f1e0ee7..0000000000 --- a/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMergerFloatWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClMergerFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute"); - ClBaseMergerWorkload::Execute(); -} - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp b/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp deleted file mode 100644 index 9782f7a8f3..0000000000 --- a/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseMergerWorkload.hpp" - -namespace armnn -{ - -class ClMergerFloatWorkload : public ClBaseMergerWorkload -{ -public: - using ClBaseMergerWorkload::ClBaseMergerWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn - - diff --git a/src/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/backends/ClWorkloads/ClMergerUint8Workload.cpp deleted file mode 100644 index 9d1060d857..0000000000 --- a/src/backends/ClWorkloads/ClMergerUint8Workload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMergerUint8Workload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClMergerUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute"); - ClBaseMergerWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/backends/ClWorkloads/ClMergerUint8Workload.hpp deleted file mode 100644 index cbfc19a0f2..0000000000 --- a/src/backends/ClWorkloads/ClMergerUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseMergerWorkload.hpp" - -namespace armnn -{ - -class ClMergerUint8Workload : public ClBaseMergerWorkload -{ -public: - using ClBaseMergerWorkload::ClBaseMergerWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp deleted file mode 100644 index c3330a98e8..0000000000 --- a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClMultiplicationFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) -{ - const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, - // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be - // ignored for F32 tensors. - return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, - &aclInput2, - &aclOutput, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_ZERO); -} - - -ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_PixelWiseMultiplication.configure(&input0, - &input1, - &output, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); -} - -void ClMultiplicationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute"); - - // Executes the layer. - m_PixelWiseMultiplication.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp deleted file mode 100644 index c2d6b7697a..0000000000 --- a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp +++ /dev/null @@ -1,34 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); - -class ClMultiplicationFloatWorkload : public FloatWorkload -{ -public: - ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); - - using FloatWorkload::FloatWorkload; - void Execute() const override; - -private: - mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; -}; - -} //namespace armnn - - - diff --git a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp deleted file mode 100644 index d5863b444c..0000000000 --- a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClNormalizationFloatWorkload.hpp" -#include -#include -#include -#include -#include -#include "ClWorkloadUtils.hpp" - -using namespace armnn::armcomputetensorutils; - -namespace armnn -{ - -arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); - - arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor); - - return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); - - m_NormalizationLayer.configure(&input, &output, normalizationInfo); -}; - -void ClNormalizationFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); - m_NormalizationLayer.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp deleted file mode 100644 index f02d0adb70..0000000000 --- a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const NormalizationDescriptor& descriptor); - -class ClNormalizationFloatWorkload : public FloatWorkload -{ -public: - ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPadWorkload.cpp b/src/backends/ClWorkloads/ClPadWorkload.cpp deleted file mode 100644 index 45a9d0dc44..0000000000 --- a/src/backends/ClWorkloads/ClPadWorkload.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPadWorkload.hpp" - -#include "backends/ClTensorHandle.hpp" -#include "backends/aclCommon/ArmComputeTensorUtils.hpp" -#include "ClWorkloadUtils.hpp" -#include - -namespace armnn -{ -using namespace armcomputetensorutils; - -template -ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) -: TypedWorkload(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - arm_compute::PaddingList padList = static_cast(descriptor.m_Parameters.m_PadList); - - m_Layer.configure(&input, &output, padList); -} - -template -void ClPadWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute"); - m_Layer.run(); -} - -bool ClPadValidate(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor, - std::string* reasonIfUnsupported) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - arm_compute::PaddingList padList = static_cast(descriptor.m_PadList); - - const arm_compute::Status aclStatus = arm_compute::CLPadLayer::validate(&aclInputInfo, - &aclOutputInfo, - padList); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return supported; -} - -} // namespace armnn - -template class armnn::ClPadWorkload; -template class armnn::ClPadWorkload; diff --git a/src/backends/ClWorkloads/ClPadWorkload.hpp b/src/backends/ClWorkloads/ClPadWorkload.hpp deleted file mode 100644 index 0ec560d545..0000000000 --- a/src/backends/ClWorkloads/ClPadWorkload.hpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/WorkloadData.hpp" -#include "backends/Workload.hpp" -#include - -namespace armnn { - -template -class ClPadWorkload : public TypedWorkload -{ -public: - ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLPadLayer m_Layer; -}; - -bool ClPadValidate(const TensorInfo& input, - const TensorInfo& output, - const PadDescriptor& descriptor, - std::string* reasonIfUnsupported); - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/backends/ClWorkloads/ClPermuteWorkload.cpp deleted file mode 100644 index 11b0df80dd..0000000000 --- a/src/backends/ClWorkloads/ClPermuteWorkload.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPermuteWorkload.hpp" -#include -#include - -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) -{ - const armnn::PermutationVector& perm = descriptor.m_DimMappings; - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) - && !perm.IsEqual({ 0U, 2U, 3U, 1U }) - && !perm.IsEqual({ 3U, 2U, 0U, 1U }), - "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); - - return arm_compute::Status{}; -} - -template -ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload(descriptor, info) -{ - using armcomputetensorutils::BuildArmComputePermutationVector; - - m_Data.ValidateInputsOutputs(GetName(), 1, 1); - - const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - - // Run the layer. - m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); -} - -template -void ClPermuteWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); - m_PermuteFunction.run(); -} - -template class ClPermuteWorkload; -template class ClPermuteWorkload; - -} // namespace armnn diff --git a/src/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/backends/ClWorkloads/ClPermuteWorkload.hpp deleted file mode 100644 index a1f3161921..0000000000 --- a/src/backends/ClWorkloads/ClPermuteWorkload.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" - -#include -#include - -#include - -namespace armnn -{ - -arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); - -template -class ClPermuteWorkload : public TypedWorkload -{ -public: - static const std::string& GetName() - { - static const std::string name = std::string("ClPermuteWorkload"); - return name; - } - - ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - using TypedWorkload::m_Data; - mutable arm_compute::CLPermute m_PermuteFunction; -}; - -using ClPermuteFloatWorkload = ClPermuteWorkload; -using ClPermuteUint8Workload = ClPermuteWorkload; - -} // namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp deleted file mode 100644 index 57b056a05c..0000000000 --- a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dBaseWorkload.hpp" -#include -#include -#include -#include - -namespace armnn -{ -using namespace armcomputetensorutils; - -arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor) -{ - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); - - return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); -} - -template -ClPooling2dBaseWorkload::ClPooling2dBaseWorkload( - const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) - : TypedWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs(name, 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); - - // Run the layer. - m_PoolingLayer.configure(&input, &output, layerInfo); -} - -template class ClPooling2dBaseWorkload; -template class ClPooling2dBaseWorkload; - -} diff --git a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp deleted file mode 100644 index ea7ddfb41b..0000000000 --- a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, - const TensorInfo& output, - const Pooling2dDescriptor& descriptor); - -// Base class template providing an implementation of the Pooling2d layer common to all data types. -template -class ClPooling2dBaseWorkload : public TypedWorkload -{ -public: - using TypedWorkload::m_Data; - - ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, - const std::string& name); - -protected: - mutable arm_compute::CLPoolingLayer m_PoolingLayer; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp deleted file mode 100644 index dc9d17f0ae..0000000000 --- a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dFloatWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dFloatWorkload") -{ -} - -void ClPooling2dFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute"); - m_PoolingLayer.run(); -} - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp deleted file mode 100644 index 71648d40f4..0000000000 --- a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include "ClPooling2dBaseWorkload.hpp" - -namespace armnn -{ -class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload -{ -public: - ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp deleted file mode 100644 index 0b4b15f806..0000000000 --- a/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClPooling2dUint8Workload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, - const WorkloadInfo& info) - : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dUint8Workload") -{ -} - -void ClPooling2dUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute"); - m_PoolingLayer.run(); -} - -} //namespace armnn - - diff --git a/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp deleted file mode 100644 index 2baf2aa708..0000000000 --- a/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include "ClPooling2dBaseWorkload.hpp" - -namespace armnn -{ - -class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload -{ -public: - ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -}; - -} //namespace armnn - - diff --git a/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp b/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp deleted file mode 100644 index ea50436a66..0000000000 --- a/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClReshapeFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_Layer.configure(&input, &output); -} - -void ClReshapeFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute"); - m_Layer.run(); -} - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp b/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp deleted file mode 100644 index 48265143e5..0000000000 --- a/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -class ClReshapeFloatWorkload : public FloatWorkload -{ -public: - ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLReshapeLayer m_Layer; -}; - -} //namespace armnn - - diff --git a/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp deleted file mode 100644 index 82bd93ef9c..0000000000 --- a/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClReshapeUint8Workload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) - : Uint8Workload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); -} - -void ClReshapeUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute"); - - m_Layer.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp deleted file mode 100644 index c9801a3ae1..0000000000 --- a/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -// Reshape -class ClReshapeUint8Workload : public Uint8Workload -{ -public: - ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLReshapeLayer m_Layer; -}; - -} //namespace armnn - - diff --git a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp deleted file mode 100644 index 7c05bbf33d..0000000000 --- a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClResizeBilinearFloatWorkload.hpp" -#include -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, - const WorkloadInfo& info) - : FloatWorkload(descriptor, info) -{ - m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, - arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), - arm_compute::SamplingPolicy::TOP_LEFT); -}; - -void ClResizeBilinearFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute"); - m_ResizeBilinearLayer.run(); -} - - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp deleted file mode 100644 index f2ee67f5dd..0000000000 --- a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -class ClResizeBilinearFloatWorkload : public FloatWorkload -{ -public: - ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); - void Execute() const override; - -private: - mutable arm_compute::CLScale m_ResizeBilinearLayer; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp deleted file mode 100644 index eb05a19670..0000000000 --- a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxBaseWorkload.hpp" - -#include - -#include - -namespace armnn -{ - -arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output) -{ - // NOTE: We report 4D Softmax as unsupported until full support is added to ACL - if(input.GetShape().GetNumDimensions() >= 4u) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); - } - - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - - return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo); -} - -} diff --git a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp deleted file mode 100644 index b800056cdf..0000000000 --- a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp +++ /dev/null @@ -1,17 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include - -namespace armnn -{ - -arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, - const TensorInfo& output); - -} // namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp deleted file mode 100644 index c34b5a2a74..0000000000 --- a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxFloatWorkload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager) - : FloatWorkload(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); -} - -void ClSoftmaxFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); - m_SoftmaxLayer.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp deleted file mode 100644 index 965b845cf8..0000000000 --- a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include - -namespace armnn -{ - -class ClSoftmaxFloatWorkload : public FloatWorkload -{ -public: - ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); - void Execute() const override; - -private: - mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp deleted file mode 100644 index 1bb9628d74..0000000000 --- a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSoftmaxUint8Workload.hpp" -#include "backends/ClTensorHandle.hpp" -#include "backends/CpuTensorHandle.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager) - : Uint8Workload(descriptor, info) - , m_SoftmaxLayer(memoryManager) -{ - m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); - - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - - const auto outputQuantization = output.info()->quantization_info(); - - if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) - { - throw InvalidArgumentException( - "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); - } - - m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); -} - -void ClSoftmaxUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); - - m_SoftmaxLayer.run(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp deleted file mode 100644 index 29427a5976..0000000000 --- a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include -#include "arm_compute/runtime/MemoryManagerOnDemand.h" - -#include - -namespace armnn -{ -// Softmax -class ClSoftmaxUint8Workload : public Uint8Workload -{ -public: - ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); - - void Execute() const override; -private: - - mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; -}; - -} //namespace armnn - diff --git a/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp b/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp deleted file mode 100644 index 5fd634bdb6..0000000000 --- a/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSplitterFloatWorkload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClSplitterFloatWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute"); - ClBaseSplitterWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp b/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp deleted file mode 100644 index a0b5846f8e..0000000000 --- a/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseSplitterWorkload.hpp" - -namespace armnn -{ - -class ClSplitterFloatWorkload : public ClBaseSplitterWorkload -{ -public: - using ClBaseSplitterWorkload::ClBaseSplitterWorkload; - virtual void Execute() const override; -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp deleted file mode 100644 index 50a251ada7..0000000000 --- a/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSplitterUint8Workload.hpp" - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ - -void ClSplitterUint8Workload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute"); - ClBaseSplitterWorkload::Execute(); -} - -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp deleted file mode 100644 index 19e8be5034..0000000000 --- a/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "ClBaseSplitterWorkload.hpp" - -namespace armnn -{ -class ClSplitterUint8Workload : public ClBaseSplitterWorkload -{ -public: - using ClBaseSplitterWorkload::ClBaseSplitterWorkload; - virtual void Execute() const override; -}; -} //namespace armnn - - - diff --git a/src/backends/ClWorkloads/ClSubtractionWorkload.cpp b/src/backends/ClWorkloads/ClSubtractionWorkload.cpp deleted file mode 100644 index e5d5702389..0000000000 --- a/src/backends/ClWorkloads/ClSubtractionWorkload.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "ClSubtractionWorkload.hpp" - -#include -#include -#include - -#include "ClWorkloadUtils.hpp" - -namespace armnn -{ -using namespace armcomputetensorutils; - -static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; - -template -ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, - const WorkloadInfo& info) - : TypedWorkload(descriptor, info) -{ - this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); - - arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); - arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); - arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); -} - -template -void ClSubtractionWorkload::Execute() const -{ - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); - m_Layer.run(); -} - -bool ClSubtractionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported) -{ - const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); - const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - - const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, - &aclInput1Info, - &aclOutputInfo, - g_AclConvertPolicy); - - const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); - if (!supported && reasonIfUnsupported) - { - *reasonIfUnsupported = aclStatus.error_description(); - } - - return supported; -} - -} //namespace armnn - -template class armnn::ClSubtractionWorkload; -template class armnn::ClSubtractionWorkload; diff --git a/src/backends/ClWorkloads/ClSubtractionWorkload.hpp b/src/backends/ClWorkloads/ClSubtractionWorkload.hpp deleted file mode 100644 index 59a5f01e73..0000000000 --- a/src/backends/ClWorkloads/ClSubtractionWorkload.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "backends/Workload.hpp" - -#include - -namespace armnn -{ - -template -class ClSubtractionWorkload : public TypedWorkload -{ -public: - ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); - - void Execute() const override; - -private: - mutable arm_compute::CLArithmeticSubtraction m_Layer; -}; - -bool ClSubtractionValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output, - std::string* reasonIfUnsupported); -} //namespace armnn diff --git a/src/backends/ClWorkloads/ClWorkloadUtils.hpp b/src/backends/ClWorkloads/ClWorkloadUtils.hpp deleted file mode 100644 index 3a8ff00bb6..0000000000 --- a/src/backends/ClWorkloads/ClWorkloadUtils.hpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "OpenClTimer.hpp" -#include -#include - -#include - -#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ - ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ - name, \ - armnn::OpenClTimer(), \ - armnn::WallClockTimer()) - -namespace armnn -{ - -template -void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData) -{ - { - ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); - dstTensor.map(true); - } - - { - ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); - armcomputetensorutils::CopyArmComputeITensorData(srcData, dstTensor); - } - - dstTensor.unmap(); -} - -inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor, - const ConstCpuTensorHandle* handle) -{ - BOOST_ASSERT(handle); - - armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); - switch(handle->GetTensorInfo().GetDataType()) - { - case DataType::Float16: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); - break; - case DataType::Float32: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); - break; - case DataType::QuantisedAsymm8: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); - break; - case DataType::Signed32: - CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); - break; - default: - BOOST_ASSERT_MSG(false, "Unexpected tensor type."); - } -}; - -} //namespace armnn diff --git a/src/backends/ClWorkloads/backend.cmake b/src/backends/ClWorkloads/backend.cmake deleted file mode 100644 index e6b4673781..0000000000 --- a/src/backends/ClWorkloads/backend.cmake +++ /dev/null @@ -1,11 +0,0 @@ -# -# Copyright © 2017 Arm Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -if(ARMCOMPUTECL) - add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/ClWorkloads) - list(APPEND armnnLibraries armnnClBackend) -else() - message("CL backend is disabled") -endif() diff --git a/src/backends/ClWorkloads/backend.mk b/src/backends/ClWorkloads/backend.mk deleted file mode 100644 index 9ac5004f64..0000000000 --- a/src/backends/ClWorkloads/backend.mk +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright © 2017 ARM Ltd. All rights reserved. -# SPDX-License-Identifier: MIT -# - -# BACKEND_SOURCES contains the list of files to be included -# in the Android build and it is picked up by the Android.mk -# file in the root of ArmNN - -BACKEND_SOURCES := \ - ClActivationFloatWorkload.cpp \ - ClActivationUint8Workload.cpp \ - ClAdditionWorkload.cpp \ - ClSubtractionWorkload.cpp \ - ClBaseConstantWorkload.cpp \ - ClBatchNormalizationFloatWorkload.cpp \ - ClConstantFloatWorkload.cpp \ - ClConstantUint8Workload.cpp \ - ClConvertFp16ToFp32Workload.cpp \ - ClConvertFp32ToFp16Workload.cpp \ - ClConvolution2dBaseWorkload.cpp \ - ClConvolution2dFloatWorkload.cpp \ - ClConvolution2dUint8Workload.cpp \ - ClDepthwiseConvolutionBaseWorkload.cpp \ - ClDepthwiseConvolutionFloatWorkload.cpp \ - ClDepthwiseConvolutionUint8Workload.cpp \ - ClDivisionFloatWorkload.cpp \ - ClFloorFloatWorkload.cpp \ - ClFullyConnectedWorkload.cpp \ - ClL2NormalizationFloatWorkload.cpp \ - ClLstmFloatWorkload.cpp \ - ClMergerFloatWorkload.cpp \ - ClMergerUint8Workload.cpp \ - ClMultiplicationFloatWorkload.cpp \ - ClNormalizationFloatWorkload.cpp \ - ClPadWorkload.cpp \ - ClPermuteWorkload.cpp \ - ClPooling2dBaseWorkload.cpp \ - ClPooling2dFloatWorkload.cpp \ - ClPooling2dUint8Workload.cpp \ - ClReshapeFloatWorkload.cpp \ - ClReshapeUint8Workload.cpp \ - ClResizeBilinearFloatWorkload.cpp \ - ClSoftmaxBaseWorkload.cpp \ - ClSoftmaxFloatWorkload.cpp \ - ClSoftmaxUint8Workload.cpp \ - ClSplitterFloatWorkload.cpp \ - ClSplitterUint8Workload.cpp - diff --git a/src/backends/MemCopyWorkload.cpp b/src/backends/MemCopyWorkload.cpp index 75271a09de..29f629ae50 100644 --- a/src/backends/MemCopyWorkload.cpp +++ b/src/backends/MemCopyWorkload.cpp @@ -3,8 +3,8 @@ // SPDX-License-Identifier: MIT // #include "MemCopyWorkload.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "TypeUtils.hpp" +#include "CpuTensorHandle.hpp" +#include #include #include diff --git a/src/backends/MemCopyWorkload.hpp b/src/backends/MemCopyWorkload.hpp index 5227f32c9f..782ce835aa 100644 --- a/src/backends/MemCopyWorkload.hpp +++ b/src/backends/MemCopyWorkload.hpp @@ -5,7 +5,7 @@ #pragma once #include "CpuTensorHandleFwd.hpp" -#include "backends/Workload.hpp" +#include "Workload.hpp" #include "WorkloadUtils.hpp" #include diff --git a/src/backends/OutputHandler.cpp b/src/backends/OutputHandler.cpp index 4dfa1a621e..15e90c7ba8 100644 --- a/src/backends/OutputHandler.cpp +++ b/src/backends/OutputHandler.cpp @@ -7,9 +7,9 @@ #include #include -#include "backends/WorkloadFactory.hpp" -#include "backends/WorkloadDataCollector.hpp" -#include "backends/ITensorHandle.hpp" +#include "WorkloadFactory.hpp" +#include "WorkloadDataCollector.hpp" +#include "ITensorHandle.hpp" namespace armnn { diff --git a/src/backends/OutputHandler.hpp b/src/backends/OutputHandler.hpp index 97da87d8cc..ad4a2931cd 100644 --- a/src/backends/OutputHandler.hpp +++ b/src/backends/OutputHandler.hpp @@ -4,7 +4,7 @@ // #pragma once -#include "backends/WorkloadDataFwd.hpp" +#include #include #include @@ -14,10 +14,10 @@ #include -#include "armnn/INetwork.hpp" -#include "armnn/Types.hpp" -#include "armnn/Descriptors.hpp" -#include "armnn/Tensor.hpp" +#include +#include +#include +#include #include "ITensorHandle.hpp" namespace armnn diff --git a/src/backends/WorkloadFactory.cpp b/src/backends/WorkloadFactory.cpp index aaccabd059..a70097eb82 100644 --- a/src/backends/WorkloadFactory.cpp +++ b/src/backends/WorkloadFactory.cpp @@ -5,7 +5,7 @@ #include "WorkloadFactory.hpp" #include #include -#include "ClWorkloadFactory.hpp" +#include #include #include diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt new file mode 100644 index 0000000000..80ca0acc08 --- /dev/null +++ b/src/backends/cl/CMakeLists.txt @@ -0,0 +1,22 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnClBackend_sources + ClContextControl.cpp + ClContextControl.hpp + ClLayerSupport.cpp + ClLayerSupport.hpp + ClWorkloadFactory.cpp + ClWorkloadFactory.hpp +) + +if(ARMCOMPUTECL) + add_subdirectory(workloads test) +endif() + +add_library(armnnClBackend STATIC ${armnnClBackend_sources}) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp new file mode 100644 index 0000000000..e8b21c942d --- /dev/null +++ b/src/backends/cl/ClContextControl.cpp @@ -0,0 +1,235 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClContextControl.hpp" + +#include "armnn/Exceptions.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include +#include +#endif + +#include +#include +#include +#include +#include + +#include "LeakChecking.hpp" + +namespace cl +{ +class Context; +class CommandQueue; +class Device; +} + +namespace armnn +{ + +ClContextControl::ClContextControl(IGpuAccTunedParameters* clTunedParameters, + bool profilingEnabled) + : m_clTunedParameters(boost::polymorphic_downcast(clTunedParameters)) + , m_ProfilingEnabled(profilingEnabled) +{ + // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. + boost::ignore_unused(m_ProfilingEnabled); + +#ifdef ARMCOMPUTECL_ENABLED + try + { + std::vector platforms; + cl::Platform::get(&platforms); + + // Selects default platform for the first element. + cl::Platform::setDefault(platforms[0]); + + std::vector devices; + platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); + + // Selects default device for the first element. + cl::Device::setDefault(devices[0]); + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Removes the use of global CL context. + cl::Context::setDefault(cl::Context{}); + BOOST_ASSERT(cl::Context::getDefault()() == NULL); + + // Removes the use of global CL command queue. + cl::CommandQueue::setDefault(cl::CommandQueue{}); + BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL); + + // Always load the OpenCL runtime. + LoadOpenClRuntime(); +#endif +} + +ClContextControl::~ClContextControl() +{ +#ifdef ARMCOMPUTECL_ENABLED + // Load the OpencCL runtime without the tuned parameters to free the memory for them. + try + { + UnloadOpenClRuntime(); + } + catch (const cl::Error& clError) + { + // This should not happen, it is ignored if it does. + + // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an + // exception of type std::length_error. + // Using stderr instead in this context as there is no point in nesting try-catch blocks here. + std::cerr << "A CL error occurred unloading the runtime tuner parameters: " + << clError.what() << ". CL error code is: " << clError.err() << std::endl; + } +#endif +} + +void ClContextControl::LoadOpenClRuntime() +{ + DoLoadOpenClRuntime(true); +} + +void ClContextControl::UnloadOpenClRuntime() +{ + DoLoadOpenClRuntime(false); +} + +void ClContextControl::DoLoadOpenClRuntime(bool useTunedParameters) +{ +#ifdef ARMCOMPUTECL_ENABLED + cl::Device device = cl::Device::getDefault(); + cl::Context context; + cl::CommandQueue commandQueue; + + if (arm_compute::CLScheduler::get().context()() != NULL) + { + // Wait for all queued CL requests to finish before reinitialising it. + arm_compute::CLScheduler::get().sync(); + } + + try + { + arm_compute::CLKernelLibrary::get().clear_programs_cache(); + // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no + // context references); it is initialised again, with a proper context, later. + arm_compute::CLScheduler::get().init(context, commandQueue, device); + arm_compute::CLKernelLibrary::get().init(".", context, device); + + { + // + // Here we replace the context with a new one in which + // the memory leak checks show it as an extra allocation but + // because of the scope of the leak checks, it doesn't count + // the disposal of the original object. On the other hand it + // does count the creation of this context which it flags + // as a memory leak. By adding the following line we prevent + // this to happen. + // + ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); + context = cl::Context(device); + } + + // NOTE: In this specific case profiling has to be enabled on the command queue + // in order for the CLTuner to work. + bool profilingNeededForClTuner = useTunedParameters && m_clTunedParameters && + m_clTunedParameters->m_Mode == IGpuAccTunedParameters::Mode::UpdateTunedParameters; + + if (m_ProfilingEnabled || profilingNeededForClTuner) + { + // Create a new queue with profiling enabled. + commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); + } + else + { + // Use default queue. + commandQueue = cl::CommandQueue(context, device); + } + } + catch (const cl::Error& clError) + { + throw ClRuntimeUnavailableException(boost::str(boost::format( + "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%" + ) % clError.what() % clError.err())); + } + + // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. + arm_compute::CLKernelLibrary::get().init(".", context, device); + + arm_compute::ICLTuner* tuner = nullptr; + if (useTunedParameters && m_clTunedParameters) + { + tuner = &m_clTunedParameters->m_Tuner; + } + arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner); +#endif +} + +void ClContextControl::ClearClCache() +{ + DoLoadOpenClRuntime(true); +} + +armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode) +{ + return new ClTunedParameters(mode); +} + +armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode) +{ + return IGpuAccTunedParametersPtr(CreateRaw(mode), &IGpuAccTunedParameters::Destroy); +} + +void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) +{ + delete params; +} + +ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode) + : m_Mode(mode) +#ifdef ARMCOMPUTECL_ENABLED + , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) +#endif +{ +} + +void ClTunedParameters::Load(const char* filename) +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.load_from_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + + e.what()); + } +#endif +} + +void ClTunedParameters::Save(const char* filename) const +{ +#ifdef ARMCOMPUTECL_ENABLED + try + { + m_Tuner.save_to_file(filename); + } + catch (const std::exception& e) + { + throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + + e.what()); + } +#endif +} + +} // namespace armnn diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp new file mode 100644 index 0000000000..5ac56423bd --- /dev/null +++ b/src/backends/cl/ClContextControl.hpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "armnn/IRuntime.hpp" + +#ifdef ARMCOMPUTECL_ENABLED +#include +#endif + +namespace armnn +{ + +class IGpuAccTunedParameters; +class ClTunedParameters; + +// ARM Compute OpenCL context control. +class ClContextControl +{ +public: + + ClContextControl(IGpuAccTunedParameters* clTunedParameters = nullptr, + bool profilingEnabled = false); + + virtual ~ClContextControl(); + + void LoadOpenClRuntime(); + + // Users should call this (after freeing all of the cl::Context objects they use) + // to release the cached memory used by the compute library. + void UnloadOpenClRuntime(); + + // Clear the CL cache, without losing the tuned parameter settings. + void ClearClCache(); + +private: + + void DoLoadOpenClRuntime(bool useTunedParameters); + + ClTunedParameters* m_clTunedParameters; + + bool m_ProfilingEnabled; +}; + +class ClTunedParameters : public IGpuAccTunedParameters +{ +public: + ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode); + + virtual void Load(const char* filename); + virtual void Save(const char* filename) const; + + Mode m_Mode; + +#ifdef ARMCOMPUTECL_ENABLED + arm_compute::CLTuner m_Tuner; +#endif +}; + +} // namespace armnn diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp new file mode 100644 index 0000000000..6c1940b02f --- /dev/null +++ b/src/backends/cl/ClLayerSupport.cpp @@ -0,0 +1,478 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "LayerSupportCommon.hpp" + +#include "ClLayerSupport.hpp" +#include "InternalTypes.hpp" +#include +#include +#include + +#include + +#ifdef ARMCOMPUTECL_ENABLED +#include "workloads/ClAdditionWorkload.hpp" +#include "workloads/ClActivationFloatWorkload.hpp" +#include "workloads/ClBatchNormalizationFloatWorkload.hpp" +#include "workloads/ClConvertFp16ToFp32Workload.hpp" +#include "workloads/ClConvertFp32ToFp16Workload.hpp" +#include "workloads/ClConvolution2dBaseWorkload.hpp" +#include "workloads/ClDepthwiseConvolutionBaseWorkload.hpp" +#include "workloads/ClDivisionFloatWorkload.hpp" +#include "workloads/ClL2NormalizationFloatWorkload.hpp" +#include "workloads/ClMultiplicationFloatWorkload.hpp" +#include "workloads/ClFullyConnectedWorkload.hpp" +#include "workloads/ClPadWorkload.hpp" +#include "workloads/ClPooling2dBaseWorkload.hpp" +#include "workloads/ClPermuteWorkload.hpp" +#include "workloads/ClNormalizationFloatWorkload.hpp" +#include "workloads/ClSoftmaxBaseWorkload.hpp" +#include "workloads/ClSubtractionWorkload.hpp" +#include "workloads/ClLstmFloatWorkload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ +namespace +{ +template +bool IsMatchingSize2d(const TensorInfo& weightInfo) +{ + // Width & Height must match. + return (weightInfo.GetShape()[3] == FilterSize) && (weightInfo.GetShape()[2] == FilterSize); +} + +template +bool IsMatchingStride(uint32_t actualStride) +{ + return ValidStride == actualStride; +} + +template +bool IsMatchingStride(uint32_t actualStride) +{ + return IsMatchingStride(actualStride) || IsMatchingStride(actualStride); +}; + +bool IsClBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTECL_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without CL support"; + } + return false; +#endif +} + +#if ARMCOMPUTECL_ENABLED +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) (expr) +#else +#define FORWARD_CL_LAYER_SUPPORT_FUNC(expr) IsClBackendSupported(reasonIfUnsupported) +#endif + +#if ARMCOMPUTECL_ENABLED +template +inline bool IsWorkloadSupported(FuncType&& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsClBackendSupported(reasonIfUnsupported); +#endif + +} //namespace + +template +bool IsSupportedForDataTypeCl(std::string* reasonIfUnsupported, + DataType dataType, + FloatFunc floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + floatFuncPtr, + uint8FuncPtr, + std::forward(params)...); +} + +bool IsActivationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClActivationWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClAdditionValidate(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchNormalizationValidate, + reasonIfUnsupported, + input, + output, + mean, + var, + beta, + gamma, + descriptor); +} + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + bool isSupported = false; + + bool strideXIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideX); + bool strideXIsThree = IsMatchingStride<3>(desc.m_StrideX); + + bool strideYIsOneOrTwo = IsMatchingStride<1, 2>(desc.m_StrideY); + bool strideYIsThree = IsMatchingStride<3>(desc.m_StrideY); + + bool strideIsOneOrTwo = strideXIsOneOrTwo && strideYIsOneOrTwo; + bool strideIsOneOrTwoOrThree = ( strideXIsOneOrTwo || strideXIsThree ) && ( strideYIsOneOrTwo || strideYIsThree ); + + // 1x1 convolution with strides of 1,2,3. + isSupported |= IsMatchingSize2d<1>(weightInfo) && ( strideIsOneOrTwoOrThree ); + + // 3x3 convolution with strides of 1,2. + isSupported |= IsMatchingSize2d<3>(weightInfo) && ( strideIsOneOrTwo ); + + // 5x5 convolution with strides of 1,2 + isSupported |= IsMatchingSize2d<5>(weightInfo) && ( strideIsOneOrTwo ); + + //Fall back to normal convolution for the asymmetric padding case. + if (desc.m_PadLeft != desc.m_PadRight || + desc.m_PadTop != desc.m_PadBottom) + { + //Direct convolution does not support asymmetric padding yet. + isSupported = false; + } + + return isSupported; +} + +bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported, + const Convolution2dDescriptor& parameters, + const TensorInfo& weightInfo) +{ + return IsClDirectConvolution2dSupported(weightInfo, parameters); +} + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDepthwiseConvolutionWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsSubtractionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClSubtractionValidate(input0, + input1, + output, + reasonIfUnsupported)); +} + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClFullyConnectedWorkloadValidate, + reasonIfUnsupported, + input, + output, + weights, + biases, + descriptor); +} + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsMergerSupportedCl(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPadSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return FORWARD_CL_LAYER_SUPPORT_FUNC(ClPadValidate(input, output, descriptor, reasonIfUnsupported)); +} + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPermuteWorkloadValidate, reasonIfUnsupported, descriptor); +} + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output); +} + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeCl(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return true; +} + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsClBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClLstmFloatWorkloadValidate, reasonIfUnsupported, + input, outputStateIn, cellStateIn, scratchBuffer, outputStateOut, cellStateOut, + output, descriptor, inputToForgetWeights, inputToCellWeights, + inputToOutputWeights, recurrentToForgetWeights, + recurrentToCellWeights, recurrentToOutputWeights, + forgetGateBias, cellBias, outputGateBias, + inputToInputWeights, recurrentToInputWeights, + cellToInputWeights, inputGateBias, projectionWeights, + projectionBias, cellToForgetWeights, cellToOutputWeights); +} + +bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp16ToFp32WorkloadValidate, + reasonIfUnsupported, + input, + output, + reasonIfUnsupported); +} + +bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvertFp32ToFp16WorkloadValidate, + reasonIfUnsupported, + input, + output, + reasonIfUnsupported); +} + +bool IsMeanSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp new file mode 100644 index 0000000000..700d71801d --- /dev/null +++ b/src/backends/cl/ClLayerSupport.hpp @@ -0,0 +1,164 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include +#include + +#include + +namespace armnn +{ +bool IsClDirectConvolution2dSupported(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); +bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsActivationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsAdditionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsBatchNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedCl(const std::vector inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedCl(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedCl(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPadSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedCl(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedCl(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedCl(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedCl(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedCl(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp new file mode 100644 index 0000000000..556e4479b6 --- /dev/null +++ b/src/backends/cl/ClTensorHandle.hpp @@ -0,0 +1,141 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace armnn +{ + + +class IClTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ICLTensor& GetTensor() = 0; + virtual arm_compute::ICLTensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; +}; + +class ClTensorHandle : public IClTensorHandle +{ +public: + ClTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + ClTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); + } + + arm_compute::CLTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override {armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);} + + virtual void Manage() override + { + assert(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } + + virtual const void* Map(bool blocking = true) const override + { + const_cast(&m_Tensor)->map(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast(&m_Tensor)->unmap(); } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override + { + m_MemoryGroup = boost::polymorphic_pointer_downcast(memoryGroup); + } + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } +private: + arm_compute::CLTensor m_Tensor; + std::shared_ptr m_MemoryGroup; +}; + +class ClSubTensorHandle : public IClTensorHandle +{ +public: + ClSubTensorHandle(IClTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + const_cast(&m_Tensor)->map(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast(&m_Tensor)->unmap(); } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::CL; } + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr&) override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + mutable arm_compute::CLSubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; + +}; + +} diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp new file mode 100644 index 0000000000..46a96559bf --- /dev/null +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -0,0 +1,506 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "ClWorkloadFactory.hpp" + +#include +#include + +#include +#include +#include + +#ifdef ARMCOMPUTECL_ENABLED +#include +#include +#include + +#include + +#include +#include + +#include +#endif + +#include + +#include +#include +#include + +namespace armnn +{ + +bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, + boost::optional dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::GpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTECL_ENABLED + +ClWorkloadFactory::ClWorkloadFactory() +: m_MemoryManager(std::make_unique()) +{ +} + +std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + std::unique_ptr tensorHandle = std::make_unique(tensorInfo); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + std::unique_ptr tensorHandle = std::make_unique(tensorInfo, dataLayout); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::CL); + + arm_compute::Coordinates coords; + arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // Arm compute indexes tensor coords in reverse order. + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast(subTensorOrigin[revertedIndex])); + } + + return std::make_unique( + boost::polymorphic_downcast(&parent), shape, coords); +} + +std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload, + ClAdditionWorkload>(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload, + ClSubtractionWorkload>(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); + } + + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload, + ClPadWorkload>(descriptor, info); +} + +void ClWorkloadFactory::Finalize() +{ + m_MemoryManager.Finalize(); +} + +void ClWorkloadFactory::Release() +{ + m_MemoryManager.Release(); +} + +void ClWorkloadFactory::Acquire() +{ + m_MemoryManager.Acquire(); +} + +#else // #if ARMCOMPUTECL_ENABLED + +ClWorkloadFactory::ClWorkloadFactory() +{ +} + +std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +void ClWorkloadFactory::Finalize() +{ +} + +void ClWorkloadFactory::Release() +{ +} + +void ClWorkloadFactory::Acquire() +{ +} + +#endif // #if ARMCOMPUTECL_ENABLED + +} // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp new file mode 100644 index 0000000000..59ae3b343a --- /dev/null +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -0,0 +1,139 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include +#include + +#include "memory/BaseMemoryManager.hpp" + +namespace armnn +{ + +// ARM Compute OpenCL workload factory. +class ClWorkloadFactory : public IWorkloadFactory +{ +public: + ClWorkloadFactory(); + + virtual Compute GetCompute() const override { return Compute::GpuAcc; } + + static bool IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const override; + + virtual std::unique_ptr CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual void Finalize() override; + + virtual void Release() override; + + virtual void Acquire() override; + +private: + +#ifdef ARMCOMPUTECL_ENABLED + mutable ClMemoryManager m_MemoryManager; +#endif +}; + +} // namespace armnn diff --git a/src/backends/cl/backend.cmake b/src/backends/cl/backend.cmake new file mode 100644 index 0000000000..1af88e3c9b --- /dev/null +++ b/src/backends/cl/backend.cmake @@ -0,0 +1,13 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTECL) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/cl) + list(APPEND armnnLibraries armnnClBackend armnnClBackendWorkloads) +else() + message("CL backend is disabled") + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/cl) + list(APPEND armnnLibraries armnnClBackend) +endif() diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk new file mode 100644 index 0000000000..2418a24249 --- /dev/null +++ b/src/backends/cl/backend.mk @@ -0,0 +1,51 @@ +# +# Copyright © 2017 ARM Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +# BACKEND_SOURCES contains the list of files to be included +# in the Android build and it is picked up by the Android.mk +# file in the root of ArmNN + +BACKEND_SOURCES := \ + ClContextControl.cpp \ + ClLayerSupport.cpp \ + ClWorkloadFactory.cpp \ + workloads/ClActivationFloatWorkload.cpp \ + workloads/ClActivationUint8Workload.cpp \ + workloads/ClAdditionWorkload.cpp \ + workloads/ClBaseConstantWorkload.cpp \ + workloads/ClBatchNormalizationFloatWorkload.cpp \ + workloads/ClConstantFloatWorkload.cpp \ + workloads/ClConstantUint8Workload.cpp \ + workloads/ClConvertFp16ToFp32Workload.cpp \ + workloads/ClConvertFp32ToFp16Workload.cpp \ + workloads/ClConvolution2dBaseWorkload.cpp \ + workloads/ClConvolution2dFloatWorkload.cpp \ + workloads/ClConvolution2dUint8Workload.cpp \ + workloads/ClDepthwiseConvolutionBaseWorkload.cpp \ + workloads/ClDepthwiseConvolutionFloatWorkload.cpp \ + workloads/ClDepthwiseConvolutionUint8Workload.cpp \ + workloads/ClDivisionFloatWorkload.cpp \ + workloads/ClFloorFloatWorkload.cpp \ + workloads/ClFullyConnectedWorkload.cpp \ + workloads/ClL2NormalizationFloatWorkload.cpp \ + workloads/ClLstmFloatWorkload.cpp \ + workloads/ClMergerFloatWorkload.cpp \ + workloads/ClMergerUint8Workload.cpp \ + workloads/ClMultiplicationFloatWorkload.cpp \ + workloads/ClNormalizationFloatWorkload.cpp \ + workloads/ClPadWorkload.cpp \ + workloads/ClPermuteWorkload.cpp \ + workloads/ClPooling2dBaseWorkload.cpp \ + workloads/ClPooling2dFloatWorkload.cpp \ + workloads/ClPooling2dUint8Workload.cpp \ + workloads/ClReshapeFloatWorkload.cpp \ + workloads/ClReshapeUint8Workload.cpp \ + workloads/ClResizeBilinearFloatWorkload.cpp \ + workloads/ClSoftmaxBaseWorkload.cpp \ + workloads/ClSoftmaxFloatWorkload.cpp \ + workloads/ClSoftmaxUint8Workload.cpp \ + workloads/ClSplitterFloatWorkload.cpp \ + workloads/ClSplitterUint8Workload.cpp \ + workloads/ClSubtractionWorkload.cpp diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt new file mode 100644 index 0000000000..066c37f083 --- /dev/null +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -0,0 +1,92 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnClBackendWorkloads_sources + ClActivationFloatWorkload.cpp + ClActivationFloatWorkload.hpp + ClActivationUint8Workload.cpp + ClActivationUint8Workload.hpp + ClAdditionWorkload.cpp + ClAdditionWorkload.hpp + ClBaseConstantWorkload.cpp + ClBaseConstantWorkload.hpp + ClBaseMergerWorkload.hpp + ClBaseSplitterWorkload.hpp + ClBatchNormalizationFloatWorkload.cpp + ClBatchNormalizationFloatWorkload.hpp + ClConstantFloatWorkload.cpp + ClConstantFloatWorkload.hpp + ClConstantUint8Workload.cpp + ClConstantUint8Workload.hpp + ClConvertFp16ToFp32Workload.cpp + ClConvertFp16ToFp32Workload.hpp + ClConvertFp32ToFp16Workload.cpp + ClConvertFp32ToFp16Workload.hpp + ClConvolution2dBaseWorkload.cpp + ClConvolution2dBaseWorkload.hpp + ClConvolution2dFloatWorkload.cpp + ClConvolution2dFloatWorkload.hpp + ClConvolution2dUint8Workload.cpp + ClConvolution2dUint8Workload.hpp + ClDepthwiseConvolutionBaseWorkload.cpp + ClDepthwiseConvolutionBaseWorkload.hpp + ClDepthwiseConvolutionFloatWorkload.cpp + ClDepthwiseConvolutionFloatWorkload.hpp + ClDepthwiseConvolutionUint8Workload.cpp + ClDepthwiseConvolutionUint8Workload.hpp + ClDivisionFloatWorkload.cpp + ClDivisionFloatWorkload.hpp + ClFloorFloatWorkload.cpp + ClFloorFloatWorkload.hpp + ClFullyConnectedWorkload.cpp + ClFullyConnectedWorkload.hpp + ClL2NormalizationFloatWorkload.cpp + ClL2NormalizationFloatWorkload.hpp + ClLstmFloatWorkload.cpp + ClLstmFloatWorkload.hpp + ClMergerFloatWorkload.cpp + ClMergerFloatWorkload.hpp + ClMergerUint8Workload.cpp + ClMergerUint8Workload.hpp + ClMultiplicationFloatWorkload.cpp + ClMultiplicationFloatWorkload.hpp + ClNormalizationFloatWorkload.cpp + ClNormalizationFloatWorkload.hpp + ClPadWorkload.cpp + ClPadWorkload.hpp + ClPermuteWorkload.cpp + ClPermuteWorkload.hpp + ClPooling2dBaseWorkload.cpp + ClPooling2dBaseWorkload.hpp + ClPooling2dFloatWorkload.cpp + ClPooling2dFloatWorkload.hpp + ClPooling2dUint8Workload.cpp + ClPooling2dUint8Workload.hpp + ClReshapeFloatWorkload.cpp + ClReshapeFloatWorkload.hpp + ClReshapeUint8Workload.cpp + ClReshapeUint8Workload.hpp + ClResizeBilinearFloatWorkload.cpp + ClResizeBilinearFloatWorkload.hpp + ClSoftmaxBaseWorkload.cpp + ClSoftmaxBaseWorkload.hpp + ClSoftmaxFloatWorkload.cpp + ClSoftmaxFloatWorkload.hpp + ClSoftmaxUint8Workload.cpp + ClSoftmaxUint8Workload.hpp + ClSplitterFloatWorkload.cpp + ClSplitterFloatWorkload.hpp + ClSplitterUint8Workload.cpp + ClSplitterUint8Workload.hpp + ClSubtractionWorkload.cpp + ClSubtractionWorkload.hpp + ClWorkloads.hpp + ClWorkloadUtils.hpp +) + +add_library(armnnClBackendWorkloads STATIC ${armnnClBackendWorkloads_sources}) +target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/cl/workloads/ClActivationFloatWorkload.cpp b/src/backends/cl/workloads/ClActivationFloatWorkload.cpp new file mode 100644 index 0000000000..cbaac9d226 --- /dev/null +++ b/src/backends/cl/workloads/ClActivationFloatWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationFloatWorkload.hpp" +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "CL: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::CLActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void ClActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClActivationFloatWorkload.hpp b/src/backends/cl/workloads/ClActivationFloatWorkload.hpp new file mode 100644 index 0000000000..cb560a791b --- /dev/null +++ b/src/backends/cl/workloads/ClActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +// Activation layer execution. +class ClActivationFloatWorkload : public FloatWorkload +{ +public: + ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClActivationUint8Workload.cpp b/src/backends/cl/workloads/ClActivationUint8Workload.cpp new file mode 100644 index 0000000000..ad6b73074b --- /dev/null +++ b/src/backends/cl/workloads/ClActivationUint8Workload.cpp @@ -0,0 +1,44 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationUint8Workload.hpp" +#include + +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void ClActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} + +} //namespace Armnn + + diff --git a/src/backends/cl/workloads/ClActivationUint8Workload.hpp b/src/backends/cl/workloads/ClActivationUint8Workload.hpp new file mode 100644 index 0000000000..d0b7d3a78f --- /dev/null +++ b/src/backends/cl/workloads/ClActivationUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +// Activation layer execution. +class ClActivationUint8Workload : public Uint8Workload +{ +public: + ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp new file mode 100644 index 0000000000..aa032e872c --- /dev/null +++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClAdditionWorkload.hpp" + +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template +ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template +void ClAdditionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); + m_Layer.run(); +} + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClAdditionWorkload; +template class armnn::ClAdditionWorkload; diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp new file mode 100644 index 0000000000..3e4ee26793 --- /dev/null +++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +template +class ClAdditionWorkload : public TypedWorkload +{ +public: + ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticAddition m_Layer; +}; + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseConstantWorkload.cpp b/src/backends/cl/workloads/ClBaseConstantWorkload.cpp new file mode 100644 index 0000000000..2557020b59 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseConstantWorkload.cpp @@ -0,0 +1,64 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBaseConstantWorkload.hpp" +#include +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +template class ClBaseConstantWorkload; +template class ClBaseConstantWorkload; + +template +void ClBaseConstantWorkload::Execute() const +{ + // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data + // on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not + // have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::CLTensor& output = static_cast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = static_cast(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseConstantWorkload.hpp b/src/backends/cl/workloads/ClBaseConstantWorkload.hpp new file mode 100644 index 0000000000..f7a23a9162 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseConstantWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ +template +class ClBaseConstantWorkload : public TypedWorkload +{ +public: + ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseMergerWorkload.hpp b/src/backends/cl/workloads/ClBaseMergerWorkload.hpp new file mode 100644 index 0000000000..f8ff6f9379 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseMergerWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Merger layer common to all data types. +template +class ClBaseMergerWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp b/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..7fdcc84235 --- /dev/null +++ b/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template +class ClBaseSplitterWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..5bff7a63c9 --- /dev/null +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBatchNormalizationFloatWorkload.hpp" +#include +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor &desc) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + desc.m_Eps); +} + +ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Mean = std::make_unique(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean); + InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance); + InitializeArmComputeClTensorData(*m_Beta, m_Data.m_Beta); + InitializeArmComputeClTensorData(*m_Gamma, m_Data.m_Gamma); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void ClBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..804591c444 --- /dev/null +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& desc); + +class ClBatchNormalizationFloatWorkload : public FloatWorkload +{ +public: + ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLBatchNormalizationLayer m_Layer; + + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Gamma; + std::unique_ptr m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClConstantFloatWorkload.cpp b/src/backends/cl/workloads/ClConstantFloatWorkload.cpp new file mode 100644 index 0000000000..1565047c22 --- /dev/null +++ b/src/backends/cl/workloads/ClConstantFloatWorkload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantFloatWorkload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantFloatWorkload.hpp b/src/backends/cl/workloads/ClConstantFloatWorkload.hpp new file mode 100644 index 0000000000..0cbeaad9ea --- /dev/null +++ b/src/backends/cl/workloads/ClConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ +class ClConstantFloatWorkload : public ClBaseConstantWorkload +{ +public: + using ClBaseConstantWorkload::ClBaseConstantWorkload; + void Execute() const override; +}; + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantUint8Workload.cpp b/src/backends/cl/workloads/ClConstantUint8Workload.cpp new file mode 100644 index 0000000000..a5ef0321cd --- /dev/null +++ b/src/backends/cl/workloads/ClConstantUint8Workload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantUint8Workload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConstantUint8Workload.hpp b/src/backends/cl/workloads/ClConstantUint8Workload.hpp new file mode 100644 index 0000000000..30556dc0d6 --- /dev/null +++ b/src/backends/cl/workloads/ClConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ + +class ClConstantUint8Workload : public ClBaseConstantWorkload +{ +public: + using ClBaseConstantWorkload::ClBaseConstantWorkload; + void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..e7663b4ca4 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp16ToFp32Workload.hpp" +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( + const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float16ToFloat32Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Input should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Output should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..b6447488f7 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + + ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..2ae4adc424 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp32ToFp16Workload.hpp" +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( + const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float32ToFloat16Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Input should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Output should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..95d19905d7 --- /dev/null +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + + ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..58699a8287 --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dBaseWorkload.hpp" +#include +#include +#include +#include + +#include + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +} diff --git a/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..a983dba79a --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..813808345e --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dFloatWorkload.hpp" +#include +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + + // todo: check tensor shapes match. + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..1f9710e1ea --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +#include + +namespace armnn +{ + +class ClConvolution2dFloatWorkload : public FloatWorkload +{ +public: + ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp b/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..d9b9dfd833 --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dUint8Workload.hpp" +#include +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + // todo: check tensor shapes match + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp b/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..1720ec935c --- /dev/null +++ b/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +#include + +namespace armnn +{ + +class ClConvolution2dUint8Workload : public Uint8Workload +{ +public: + ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..5a036db922 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,125 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +#include "TypeUtils.hpp" + +#include +#include +#include +#include + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +template +ClDepthwiseConvolutionBaseWorkload::ClDepthwiseConvolutionBaseWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + std::string name = std::string("ClDepthwiseConvolution") + + GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload"; + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + + //Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); + if (use3x3Optimisation) + { + m_DepthwiseConvolutionLayer = std::make_unique(); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + else + { + m_DepthwiseConvolutionLayer = std::make_unique(); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + + BOOST_ASSERT(m_DepthwiseConvolutionLayer); +} + +template +void ClDepthwiseConvolutionBaseWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generate known implementations for linker +template class ClDepthwiseConvolutionBaseWorkload; +template class ClDepthwiseConvolutionBaseWorkload; + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..9d5cde30b6 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +template +class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + +protected: + std::unique_ptr m_DepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..17ecd29307 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionFloatWorkload.hpp" + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..4f9d5f332e --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload +{ +public: + ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..22922e4df6 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionUint8Workload.hpp" + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..b9f676de94 --- /dev/null +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,23 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload +{ +public: + ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp new file mode 100644 index 0000000000..a2d8534682 --- /dev/null +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDivisionFloatWorkload.hpp" +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); +} + + +ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_ArithmeticDivision.configure(&input0, &input1, &output); +} + +void ClDivisionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); + + // Executes the layer. + m_ArithmeticDivision.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp new file mode 100644 index 0000000000..1aa7ec69f6 --- /dev/null +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClDivisionFloatWorkload : public FloatWorkload +{ +public: + ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const + WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp new file mode 100644 index 0000000000..0a60fc3b5c --- /dev/null +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFloorFloatWorkload.hpp" +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp new file mode 100644 index 0000000000..513862a4d7 --- /dev/null +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +class ClFloorFloatWorkload : public FloatWorkload +{ +public: + ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..b3a97f35f8 --- /dev/null +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFullyConnectedWorkload.hpp" +#include +#include +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + return arm_compute::CLFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : BaseWorkload(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); + + if (m_BiasesTensor) + { + InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void ClFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void ClFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..0c9047235b --- /dev/null +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class ClFullyConnectedWorkload : public armnn::BaseWorkload +{ +public: + ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + std::shared_ptr& memoryManager); + + using armnn::BaseWorkload::m_Data; + void Execute() const override; + +private: + mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..edc13bcfea --- /dev/null +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClL2NormalizationFloatWorkload.hpp" +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void ClL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f7b7911f4c --- /dev/null +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const L2NormalizationDescriptor& descriptor); + +class ClL2NormalizationFloatWorkload : public FloatWorkload +{ +public: + ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + // Purposely not a CLL2Normalize function. See constructor. + mutable arm_compute::CLNormalizationLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp new file mode 100644 index 0000000000..352698ad1b --- /dev/null +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -0,0 +1,391 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClLstmFloatWorkload.hpp" +#include +#include +#include +#include + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) + : FloatWorkload(descriptor, info) +{ + arm_compute::LSTMParams lstm_param; + + // Basic parameters + m_InputToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); + + m_InputToCellWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); + + m_InputToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); + + m_RecurrentToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); + + m_RecurrentToCellWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); + + m_RecurrentToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); + + m_ForgetGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); + + m_CellBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); + + m_OutputGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); + + // for future reference: check the AndroidNN API for the logic here + if (!m_Data.m_Parameters.m_CifgEnabled) + { + m_InputToInputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); + + m_RecurrentToInputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); + + m_CellToInputWeightsTensor = std::make_unique(); + if (m_Data.m_CellToInputWeights != nullptr) + { + BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); + } + + m_InputGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); + + lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), + m_RecurrentToInputWeightsTensor.get(), + m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, + m_InputGateBiasTensor.get()); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + m_ProjectionWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); + + m_ProjectionBiasTensor = std::make_unique(); + if (m_Data.m_ProjectionBias != nullptr) + { + BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); + } + + lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), + m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + m_CellToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); + + m_CellToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); + + lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); + } + + const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + const arm_compute::ICLTensor& output_state_in = static_cast(m_Data.m_Inputs[1])->GetTensor(); + const arm_compute::ICLTensor& cell_state_in = static_cast(m_Data.m_Inputs[2])->GetTensor(); + + arm_compute::ICLTensor& output_state_out = static_cast(m_Data.m_Outputs[1])->GetTensor(); + arm_compute::ICLTensor& cell_state_out = static_cast(m_Data.m_Outputs[2])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[3])->GetTensor(); + + // Get the batch_size and the num_units from the cellStateIn dimensions + const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; + const unsigned int batch_size = boost::numeric_cast(inputTensorInfo.GetShape()[0]); + const unsigned int num_units = boost::numeric_cast(inputTensorInfo.GetShape()[1]); + + m_ScratchBuffer = std::make_unique(); + if (m_Data.m_Parameters.m_CifgEnabled) + { + // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG + armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); + } + else + { + // scratch_buffer [num_units * 3, batch_size] without CIFG + armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); + } + + float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; + float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (m_Data.m_Parameters.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (m_Data.m_Parameters.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + + m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), + m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), + m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), + m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), + &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, + &cell_state_out, &output, lstm_param, activationLayerInfo, + cell_threshold, projection_threshold); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); + + InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); + InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights); + InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights); + InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights); + InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights); + InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights); + InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias); + InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias); + InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias); + + if (!m_Data.m_Parameters.m_CifgEnabled) + { + InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights); + InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights); + if (m_Data.m_CellToInputWeights != nullptr) + { + InitializeArmComputeClTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights); + } + InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + InitializeArmComputeClTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights); + if (m_Data.m_ProjectionBias != nullptr) + { + InitializeArmComputeClTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias); + } + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + InitializeArmComputeClTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights); + InitializeArmComputeClTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_LstmLayer.prepare(); + FreeUnusedTensors(); +} + +void ClLstmFloatWorkload::Execute() const +{ + m_LstmLayer.run(); +} + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights) +{ + arm_compute::LSTMParams lstm_params_info; + + // The inputs and the outputs + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); + const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); + const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); + const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); + const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + // Basic parameters + const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights); + const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights); + const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights); + const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo + = BuildArmComputeTensorInfo(recurrentToForgetWeights); + const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo + = BuildArmComputeTensorInfo(recurrentToCellWeights); + const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo + = BuildArmComputeTensorInfo(recurrentToOutputWeights); + const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias); + const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias); + const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias); + + arm_compute::TensorInfo aclInputToInputWeightsInfo; + arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; + arm_compute::TensorInfo aclCellToInputWeightsInfo; + arm_compute::TensorInfo aclInputGateBiasInfo; + arm_compute::TensorInfo aclProjectionWeightsInfo; + arm_compute::TensorInfo aclProjectionBiasInfo; + arm_compute::TensorInfo aclCellToForgetWeightsInfo; + arm_compute::TensorInfo aclCellToOutputWeightsInfo; + + if (!descriptor.m_CifgEnabled) + { + armnn::TensorInfo inputToInputWInfo = *inputToInputWeights; + aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo); + armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights; + aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo); + + if (cellToInputWeights != nullptr) + { + armnn::TensorInfo cellToInputWInfo = *cellToInputWeights; + aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo); + } + armnn::TensorInfo inputGateBiasInfo = *inputGateBias; + aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo); + lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, + cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr, + &aclInputGateBiasInfo); + } + + if (descriptor.m_ProjectionEnabled) + { + const armnn::TensorInfo& projectionWInfo = *projectionWeights; + aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo); + + if (projectionBias != nullptr) + { + const armnn::TensorInfo& projectionBiasInfo = *projectionBias; + aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo); + } + lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, + projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr); + } + + if (descriptor.m_PeepholeEnabled) + { + const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights; + aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo); + const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights; + aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo); + lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); + } + + float cell_threshold = descriptor.m_ClippingThresCell; + float projection_threshold = descriptor.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (descriptor.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (descriptor.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (descriptor.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (descriptor.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (descriptor.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, + &aclInputToCellWeightsInfo, + &aclInputToOutputWeightsInfo, + &aclRecurrentToForgetWeightsInfo, + &aclRecurrentToCellWeightsInfo, + &aclRecurrentToOutputWeightsInfo, + &aclForgetGateBiasInfo, + &aclCellBiasInfo, + &aclOutputGateBiasInfo, + &aclOutputStateInInfo, &aclCellStateInInfo, + &aclScratchBufferInfo, &aclOutputStateOutInfo, + &aclCellStateOutInfo, &aclOutputInfo, + lstm_params_info, activationLayerInfo, + cell_threshold, projection_threshold); +} + +void ClLstmFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_InputToInputWeightsTensor); + FreeTensorIfUnused(m_InputToForgetWeightsTensor); + FreeTensorIfUnused(m_InputToCellWeightsTensor); + FreeTensorIfUnused(m_InputToOutputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); + FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); + FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); + FreeTensorIfUnused(m_CellToInputWeightsTensor); + FreeTensorIfUnused(m_CellToForgetWeightsTensor); + FreeTensorIfUnused(m_CellToOutputWeightsTensor); + FreeTensorIfUnused(m_InputGateBiasTensor); + FreeTensorIfUnused(m_ForgetGateBiasTensor); + FreeTensorIfUnused(m_CellBiasTensor); + FreeTensorIfUnused(m_OutputGateBiasTensor); + FreeTensorIfUnused(m_ProjectionWeightsTensor); + FreeTensorIfUnused(m_ProjectionBiasTensor); + FreeTensorIfUnused(m_ScratchBuffer); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp new file mode 100644 index 0000000000..352d774a99 --- /dev/null +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp @@ -0,0 +1,68 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include + +namespace armnn +{ + +class ClLstmFloatWorkload : public FloatWorkload +{ +public: + ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLLSTMLayer m_LstmLayer; + + std::unique_ptr m_InputToInputWeightsTensor; + std::unique_ptr m_InputToForgetWeightsTensor; + std::unique_ptr m_InputToCellWeightsTensor; + std::unique_ptr m_InputToOutputWeightsTensor; + std::unique_ptr m_RecurrentToInputWeightsTensor; + std::unique_ptr m_RecurrentToForgetWeightsTensor; + std::unique_ptr m_RecurrentToCellWeightsTensor; + std::unique_ptr m_RecurrentToOutputWeightsTensor; + std::unique_ptr m_CellToInputWeightsTensor; + std::unique_ptr m_CellToForgetWeightsTensor; + std::unique_ptr m_CellToOutputWeightsTensor; + std::unique_ptr m_InputGateBiasTensor; + std::unique_ptr m_ForgetGateBiasTensor; + std::unique_ptr m_CellBiasTensor; + std::unique_ptr m_OutputGateBiasTensor; + std::unique_ptr m_ProjectionWeightsTensor; + std::unique_ptr m_ProjectionBiasTensor; + + std::unique_ptr m_ScratchBuffer; + + void FreeUnusedTensors(); +}; + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor &descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights); +} //namespace armnn diff --git a/src/backends/cl/workloads/ClMergerFloatWorkload.cpp b/src/backends/cl/workloads/ClMergerFloatWorkload.cpp new file mode 100644 index 0000000000..151f1e0ee7 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerFloatWorkload.cpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClMergerFloatWorkload.hpp b/src/backends/cl/workloads/ClMergerFloatWorkload.hpp new file mode 100644 index 0000000000..9782f7a8f3 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerFloatWorkload : public ClBaseMergerWorkload +{ +public: + using ClBaseMergerWorkload::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClMergerUint8Workload.cpp b/src/backends/cl/workloads/ClMergerUint8Workload.cpp new file mode 100644 index 0000000000..9d1060d857 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClMergerUint8Workload.hpp b/src/backends/cl/workloads/ClMergerUint8Workload.hpp new file mode 100644 index 0000000000..cbfc19a0f2 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerUint8Workload : public ClBaseMergerWorkload +{ +public: + using ClBaseMergerWorkload::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..d53e149129 --- /dev/null +++ b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMultiplicationFloatWorkload.hpp" +#include +#include +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + + +ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_PixelWiseMultiplication.configure(&input0, + &input1, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); +} + +void ClMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute"); + + // Executes the layer. + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..a793ac64df --- /dev/null +++ b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClMultiplicationFloatWorkload : public FloatWorkload +{ +public: + ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..969c9bb08b --- /dev/null +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp @@ -0,0 +1,51 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClNormalizationFloatWorkload.hpp" +#include +#include +#include +#include +#include +#include "ClWorkloadUtils.hpp" + +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + + arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +}; + +void ClNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f30be91aaa --- /dev/null +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class ClNormalizationFloatWorkload : public FloatWorkload +{ +public: + ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp new file mode 100644 index 0000000000..45dc5e8be7 --- /dev/null +++ b/src/backends/cl/workloads/ClPadWorkload.cpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPadWorkload.hpp" + +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +template +ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) +: TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + arm_compute::PaddingList padList = static_cast(descriptor.m_Parameters.m_PadList); + + m_Layer.configure(&input, &output, padList); +} + +template +void ClPadWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute"); + m_Layer.run(); +} + +bool ClPadValidate(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + arm_compute::PaddingList padList = static_cast(descriptor.m_PadList); + + const arm_compute::Status aclStatus = arm_compute::CLPadLayer::validate(&aclInputInfo, + &aclOutputInfo, + padList); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} // namespace armnn + +template class armnn::ClPadWorkload; +template class armnn::ClPadWorkload; diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp new file mode 100644 index 0000000000..a7ad6670a7 --- /dev/null +++ b/src/backends/cl/workloads/ClPadWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include + +namespace armnn { + +template +class ClPadWorkload : public TypedWorkload +{ +public: + ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLPadLayer m_Layer; +}; + +bool ClPadValidate(const TensorInfo& input, + const TensorInfo& output, + const PadDescriptor& descriptor, + std::string* reasonIfUnsupported); + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp new file mode 100644 index 0000000000..079772dbaf --- /dev/null +++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPermuteWorkload.hpp" +#include +#include + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) +{ + const armnn::PermutationVector& perm = descriptor.m_DimMappings; + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) + && !perm.IsEqual({ 0U, 2U, 3U, 1U }) + && !perm.IsEqual({ 3U, 2U, 0U, 1U }), + "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); + + return arm_compute::Status{}; +} + +template +ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template +void ClPermuteWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class ClPermuteWorkload; +template class ClPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp new file mode 100644 index 0000000000..8ff5707ad6 --- /dev/null +++ b/src/backends/cl/workloads/ClPermuteWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); + +template +class ClPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("ClPermuteWorkload"); + return name; + } + + ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload::m_Data; + mutable arm_compute::CLPermute m_PermuteFunction; +}; + +using ClPermuteFloatWorkload = ClPermuteWorkload; +using ClPermuteUint8Workload = ClPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp b/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..98911856fe --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dBaseWorkload.hpp" +#include +#include +#include +#include + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template +ClPooling2dBaseWorkload::ClPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + // Run the layer. + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class ClPooling2dBaseWorkload; +template class ClPooling2dBaseWorkload; + +} diff --git a/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp b/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..8f9db08ddc --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template +class ClPooling2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::CLPoolingLayer m_PoolingLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp b/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..dc9d17f0ae --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dFloatWorkload") +{ +} + +void ClPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp b/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..ba9294c40f --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ +class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload +{ +public: + ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp b/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..0b4b15f806 --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dUint8Workload") +{ +} + +void ClPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp b/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..b07f955343 --- /dev/null +++ b/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload +{ +public: + ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp b/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..4da3bbd703 --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeFloatWorkload.hpp" +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp b/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..e5fc20ec8b --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +class ClReshapeFloatWorkload : public FloatWorkload +{ +public: + ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClReshapeUint8Workload.cpp b/src/backends/cl/workloads/ClReshapeUint8Workload.cpp new file mode 100644 index 0000000000..8fbee151fc --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeUint8Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeUint8Workload.hpp" +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output); +} + +void ClReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute"); + + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClReshapeUint8Workload.hpp b/src/backends/cl/workloads/ClReshapeUint8Workload.hpp new file mode 100644 index 0000000000..654437a4c1 --- /dev/null +++ b/src/backends/cl/workloads/ClReshapeUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +// Reshape +class ClReshapeUint8Workload : public Uint8Workload +{ +public: + ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp new file mode 100644 index 0000000000..499466e959 --- /dev/null +++ b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClResizeBilinearFloatWorkload.hpp" +#include +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, + arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), + arm_compute::SamplingPolicy::TOP_LEFT); +}; + +void ClResizeBilinearFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute"); + m_ResizeBilinearLayer.run(); +} + + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp new file mode 100644 index 0000000000..f29f416907 --- /dev/null +++ b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +class ClResizeBilinearFloatWorkload : public FloatWorkload +{ +public: + ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLScale m_ResizeBilinearLayer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..eb05a19670 --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxBaseWorkload.hpp" + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo); +} + +} diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..b800056cdf --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +} // namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..606005659f --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxFloatWorkload.hpp" +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void ClSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..b400b3c7ea --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include + +#include + +namespace armnn +{ + +class ClSoftmaxFloatWorkload : public FloatWorkload +{ +public: + ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..7e0589e89f --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxUint8Workload.hpp" +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void ClSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..4786faf60b --- /dev/null +++ b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ +// Softmax +class ClSoftmaxUint8Workload : public Uint8Workload +{ +public: + ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + void Execute() const override; +private: + + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp b/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..5fd634bdb6 --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp b/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..a0b5846f8e --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class ClSplitterFloatWorkload : public ClBaseSplitterWorkload +{ +public: + using ClBaseSplitterWorkload::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterUint8Workload.cpp b/src/backends/cl/workloads/ClSplitterUint8Workload.cpp new file mode 100644 index 0000000000..50a251ada7 --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClSplitterUint8Workload.hpp b/src/backends/cl/workloads/ClSplitterUint8Workload.hpp new file mode 100644 index 0000000000..19e8be5034 --- /dev/null +++ b/src/backends/cl/workloads/ClSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ +class ClSplitterUint8Workload : public ClBaseSplitterWorkload +{ +public: + using ClBaseSplitterWorkload::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; +} //namespace armnn + + + diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp new file mode 100644 index 0000000000..37b334d94e --- /dev/null +++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSubtractionWorkload.hpp" + +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template +ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template +void ClSubtractionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); + m_Layer.run(); +} + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClSubtractionWorkload; +template class armnn::ClSubtractionWorkload; diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp new file mode 100644 index 0000000000..67b219b09d --- /dev/null +++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +template +class ClSubtractionWorkload : public TypedWorkload +{ +public: + ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticSubtraction m_Layer; +}; + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloadUtils.hpp b/src/backends/cl/workloads/ClWorkloadUtils.hpp new file mode 100644 index 0000000000..3a8ff00bb6 --- /dev/null +++ b/src/backends/cl/workloads/ClWorkloadUtils.hpp @@ -0,0 +1,63 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OpenClTimer.hpp" +#include +#include + +#include + +#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ + name, \ + armnn::OpenClTimer(), \ + armnn::WallClockTimer()) + +namespace armnn +{ + +template +void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData) +{ + { + ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); + dstTensor.map(true); + } + + { + ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); + armcomputetensorutils::CopyArmComputeITensorData(srcData, dstTensor); + } + + dstTensor.unmap(); +} + +inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor, + const ConstCpuTensorHandle* handle) +{ + BOOST_ASSERT(handle); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::Float32: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::QuantisedAsymm8: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::Signed32: + CopyArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected tensor type."); + } +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp new file mode 100644 index 0000000000..3329f42e08 --- /dev/null +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "ClActivationFloatWorkload.hpp" +#include "ClActivationUint8Workload.hpp" +#include "ClAdditionWorkload.hpp" +#include "ClBaseConstantWorkload.hpp" +#include "ClBaseMergerWorkload.hpp" +#include "ClBatchNormalizationFloatWorkload.hpp" +#include "ClConstantFloatWorkload.hpp" +#include "ClConstantUint8Workload.hpp" +#include "ClConvolution2dFloatWorkload.hpp" +#include "ClConvolution2dUint8Workload.hpp" +#include "ClDepthwiseConvolutionFloatWorkload.hpp" +#include "ClDepthwiseConvolutionUint8Workload.hpp" +#include "ClDivisionFloatWorkload.hpp" +#include "ClFloorFloatWorkload.hpp" +#include "ClFullyConnectedWorkload.hpp" +#include "ClL2NormalizationFloatWorkload.hpp" +#include "ClLstmFloatWorkload.hpp" +#include "ClMergerFloatWorkload.hpp" +#include "ClMergerUint8Workload.hpp" +#include "ClMultiplicationFloatWorkload.hpp" +#include "ClNormalizationFloatWorkload.hpp" +#include "ClPermuteWorkload.hpp" +#include "ClPadWorkload.hpp" +#include "ClPooling2dFloatWorkload.hpp" +#include "ClPooling2dUint8Workload.hpp" +#include "ClReshapeFloatWorkload.hpp" +#include "ClReshapeUint8Workload.hpp" +#include "ClResizeBilinearFloatWorkload.hpp" +#include "ClSoftmaxFloatWorkload.hpp" +#include "ClSoftmaxUint8Workload.hpp" +#include "ClSplitterFloatWorkload.hpp" +#include "ClSplitterUint8Workload.hpp" +#include "ClSubtractionWorkload.hpp" +#include "ClConvertFp16ToFp32Workload.hpp" +#include "ClConvertFp32ToFp16Workload.hpp" diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp index 86ec31c71d..84e341fe64 100644 --- a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp @@ -4,10 +4,10 @@ // #include "NeonConvertFp16ToFp32Workload.hpp" -#include "Half.hpp" -#include "FloatingPointConverter.hpp" +#include +#include -#include "backends/WorkloadUtils.hpp" +#include namespace armnn { diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp index 0f4fbe4e93..261de3d0ce 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp @@ -5,11 +5,11 @@ #include "NeonConvertFp32ToFp16Workload.hpp" -#include "Half.hpp" -#include "FloatingPointConverter.hpp" +#include +#include -#include "Profiling.hpp" -#include "backends/WorkloadUtils.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/Merger.hpp b/src/backends/reference/workloads/Merger.hpp index 867925faa2..61c1311905 100644 --- a/src/backends/reference/workloads/Merger.hpp +++ b/src/backends/reference/workloads/Merger.hpp @@ -7,8 +7,7 @@ #include "RefWorkloadUtils.hpp" -#include "backends/WorkloadData.hpp" - +#include #include namespace armnn diff --git a/src/backends/reference/workloads/RefActivationFloat32Workload.hpp b/src/backends/reference/workloads/RefActivationFloat32Workload.hpp index 0de33f02ff..73be6b05bd 100644 --- a/src/backends/reference/workloads/RefActivationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefActivationFloat32Workload.hpp @@ -5,7 +5,7 @@ #pragma once -#include "backends/Workload.hpp" +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefActivationUint8Workload.hpp b/src/backends/reference/workloads/RefActivationUint8Workload.hpp index f38888a9f7..4b8cc1a418 100644 --- a/src/backends/reference/workloads/RefActivationUint8Workload.hpp +++ b/src/backends/reference/workloads/RefActivationUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.hpp b/src/backends/reference/workloads/RefArithmeticWorkload.hpp index 7197b7a883..be89a3222f 100644 --- a/src/backends/reference/workloads/RefArithmeticWorkload.hpp +++ b/src/backends/reference/workloads/RefArithmeticWorkload.hpp @@ -6,9 +6,9 @@ #pragma once #include -#include "backends/StringMapping.hpp" -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefBaseConstantWorkload.hpp b/src/backends/reference/workloads/RefBaseConstantWorkload.hpp index 8dc9fd6104..9d125e422d 100644 --- a/src/backends/reference/workloads/RefBaseConstantWorkload.hpp +++ b/src/backends/reference/workloads/RefBaseConstantWorkload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include #include diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp index 15c843c2ca..17f80ca5e0 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp index d3e8e0a120..854ba1aea5 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp +++ b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp index 5e841ba34f..475b47f901 100644 --- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp index 0754fd5c79..2d5560933d 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp index 34489e807c..4f2700d22e 100644 --- a/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefConvolution2dFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp index 0e2dd6aada..bbd77cebc4 100644 --- a/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp +++ b/src/backends/reference/workloads/RefConvolution2dUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp index 8f1227e2de..516d80941f 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp index c615cf7880..3cb82c4be5 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp index 523fdcff50..89a907b8b8 100644 --- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefFloorFloat32Workload.hpp b/src/backends/reference/workloads/RefFloorFloat32Workload.hpp index d7cfa50365..ffc4541a85 100644 --- a/src/backends/reference/workloads/RefFloorFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefFloorFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp index ce058690ac..c296f99e1c 100644 --- a/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefFullyConnectedFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp index e489cc7d81..3a15d9913a 100644 --- a/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp +++ b/src/backends/reference/workloads/RefFullyConnectedUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp index a3f03f3060..67055a9c37 100644 --- a/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefL2NormalizationFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefLstmFloat32Workload.hpp b/src/backends/reference/workloads/RefLstmFloat32Workload.hpp index fc4f7776c6..79781c6971 100644 --- a/src/backends/reference/workloads/RefLstmFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefLstmFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefMergerFloat32Workload.hpp b/src/backends/reference/workloads/RefMergerFloat32Workload.hpp index 23a523c852..0c659231d2 100644 --- a/src/backends/reference/workloads/RefMergerFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefMergerFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefMergerUint8Workload.hpp b/src/backends/reference/workloads/RefMergerUint8Workload.hpp index 65dc42120a..f19bd22cb1 100644 --- a/src/backends/reference/workloads/RefMergerUint8Workload.hpp +++ b/src/backends/reference/workloads/RefMergerUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp index e30356c422..2358c60aec 100644 --- a/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp index d72cf77e74..2cc176d205 100644 --- a/src/backends/reference/workloads/RefPermuteWorkload.hpp +++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include "backends/Workload.hpp" +#include #include diff --git a/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp b/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp index 501fb71aff..6387cb25c5 100644 --- a/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefPooling2dFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp b/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp index 6544f9a785..3802051015 100644 --- a/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp +++ b/src/backends/reference/workloads/RefPooling2dUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp b/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp index 9281e89cf7..62de71a74b 100644 --- a/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefReshapeFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefReshapeUint8Workload.hpp b/src/backends/reference/workloads/RefReshapeUint8Workload.hpp index b37fb4bdeb..25d1001d70 100644 --- a/src/backends/reference/workloads/RefReshapeUint8Workload.hpp +++ b/src/backends/reference/workloads/RefReshapeUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp index 0fff7ee695..1a8b5ba8a5 100644 --- a/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefResizeBilinearFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp index bbaf899ca6..50a0c50a83 100644 --- a/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp +++ b/src/backends/reference/workloads/RefResizeBilinearUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp index d37f2b5990..8af0cc51e7 100644 --- a/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefSoftmaxFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp b/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp index b179d529da..fe6a0d1a82 100644 --- a/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp +++ b/src/backends/reference/workloads/RefSoftmaxUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp b/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp index 12176dd277..5d6ef653c1 100644 --- a/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefSplitterFloat32Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefSplitterUint8Workload.hpp b/src/backends/reference/workloads/RefSplitterUint8Workload.hpp index e80cb1a654..5dac92a73b 100644 --- a/src/backends/reference/workloads/RefSplitterUint8Workload.hpp +++ b/src/backends/reference/workloads/RefSplitterUint8Workload.hpp @@ -5,8 +5,8 @@ #pragma once -#include "backends/Workload.hpp" -#include "backends/WorkloadData.hpp" +#include +#include namespace armnn { diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp index 616a875028..153c519350 100644 --- a/src/backends/reference/workloads/RefWorkloadUtils.hpp +++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp @@ -5,7 +5,7 @@ #pragma once -#include "backends/CpuTensorHandle.hpp" +#include #include #include diff --git a/src/backends/reference/workloads/Splitter.hpp b/src/backends/reference/workloads/Splitter.hpp index e9c0379c9e..4d6f673359 100644 --- a/src/backends/reference/workloads/Splitter.hpp +++ b/src/backends/reference/workloads/Splitter.hpp @@ -7,8 +7,7 @@ #include "RefWorkloadUtils.hpp" -#include "backends/WorkloadData.hpp" - +#include #include #include diff --git a/src/backends/test/ActivationTestImpl.hpp b/src/backends/test/ActivationTestImpl.hpp index a5d327c287..e7d3d6a9c1 100644 --- a/src/backends/test/ActivationTestImpl.hpp +++ b/src/backends/test/ActivationTestImpl.hpp @@ -9,11 +9,11 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include #include "ActivationFixture.hpp" #include @@ -122,7 +122,7 @@ LayerTestResult BoundedReLuUpperBoundOnlyTest(armnn::IWorkloadFactory& 0.999f, 1.2f, 0.89f, 6.1f, }; - // Calculated manually. + // Calculated manually. std::vector output = std::vector{ 0.0f, 0.1f, 0.5f, 6.0f, 0.786f, 5.9875f, 0.0f, 0.384f, @@ -147,7 +147,7 @@ LayerTestResult BoundedReLuUint8UpperBoundOnlyTest(armnn::IWorkloadF 251, 8, 92 }; - // Calculated manually. + // Calculated manually. std::vector output = std::vector{ 0, 122, 0, 255, 0, 58 diff --git a/src/backends/test/ArmComputeCl.cpp b/src/backends/test/ArmComputeCl.cpp index 5ff94c1056..4f1a84dfad 100644 --- a/src/backends/test/ArmComputeCl.cpp +++ b/src/backends/test/ArmComputeCl.cpp @@ -7,10 +7,10 @@ #include "LayerTests.hpp" #include -#include -#include +#include +#include #include -#include +#include #include "ActivationFixture.hpp" #include "ClContextControlFixture.hpp" diff --git a/src/backends/test/BatchNormTestImpl.hpp b/src/backends/test/BatchNormTestImpl.hpp index 7126db9074..35f4e4c89c 100644 --- a/src/backends/test/BatchNormTestImpl.hpp +++ b/src/backends/test/BatchNormTestImpl.hpp @@ -8,12 +8,12 @@ #include #include -#include "test/TensorHelpers.hpp" +#include -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include -#include "backends/test/QuantizeHelper.hpp" +#include template diff --git a/src/backends/test/ClContextControlFixture.hpp b/src/backends/test/ClContextControlFixture.hpp index 54c5a4f505..c81428ff82 100644 --- a/src/backends/test/ClContextControlFixture.hpp +++ b/src/backends/test/ClContextControlFixture.hpp @@ -5,7 +5,7 @@ #pragma once -#include "backends/ClContextControl.hpp" +#include template struct ClContextControlFixtureBase diff --git a/src/backends/test/Conv2dTestImpl.hpp b/src/backends/test/Conv2dTestImpl.hpp index eb7165bf09..ce193659a1 100644 --- a/src/backends/test/Conv2dTestImpl.hpp +++ b/src/backends/test/Conv2dTestImpl.hpp @@ -9,11 +9,11 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include // Mapping from input type to bias type for fully connected layers. // float => float, uint8_t => int32_t diff --git a/src/backends/test/CreateWorkloadCl.cpp b/src/backends/test/CreateWorkloadCl.cpp index cc0e12d202..078ef8c52d 100644 --- a/src/backends/test/CreateWorkloadCl.cpp +++ b/src/backends/test/CreateWorkloadCl.cpp @@ -2,15 +2,15 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -#include +#include #include #include -#include -#include -#include +#include +#include +#include #include "ClContextControlFixture.hpp" -#include "test/CreateWorkloadClNeon.hpp" +#include boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle, std::initializer_list expectedDimensions) diff --git a/src/backends/test/IsLayerSupportedTest.cpp b/src/backends/test/IsLayerSupportedTest.cpp index 7117e20db5..089822dade 100644 --- a/src/backends/test/IsLayerSupportedTest.cpp +++ b/src/backends/test/IsLayerSupportedTest.cpp @@ -4,7 +4,7 @@ // #include -#include "test/TensorHelpers.hpp" +#include #include "LayerTests.hpp" #include @@ -13,13 +13,13 @@ #include #include -#include +#include #include "IsLayerSupportedTestImpl.hpp" -#include "ClContextControlFixture.hpp" +#include -#include "layers/ConvertFp16ToFp32Layer.hpp" -#include "layers/ConvertFp32ToFp16Layer.hpp" +#include +#include BOOST_AUTO_TEST_SUITE(IsLayerSupported) diff --git a/src/backends/test/LayerReleaseConstantDataTest.cpp b/src/backends/test/LayerReleaseConstantDataTest.cpp index 7566c72352..7549dfd5f8 100644 --- a/src/backends/test/LayerReleaseConstantDataTest.cpp +++ b/src/backends/test/LayerReleaseConstantDataTest.cpp @@ -6,13 +6,13 @@ #include #include -#include "backends/WorkloadData.hpp" -#include "Graph.hpp" +#include +#include #include -#include "backends/CpuTensorHandle.hpp" -#include "backends/ClWorkloadFactory.hpp" +#include +#include using namespace armnn; using namespace std; diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp index 4f6cb93670..55f4a5c5f9 100644 --- a/src/backends/test/LayerTests.cpp +++ b/src/backends/test/LayerTests.cpp @@ -17,7 +17,7 @@ #include #ifdef ARMCOMPUTECL_ENABLED -#include +#include #include #endif diff --git a/src/backends/test/LstmTestImpl.hpp b/src/backends/test/LstmTestImpl.hpp index 2c4e166084..7d57c86a67 100644 --- a/src/backends/test/LstmTestImpl.hpp +++ b/src/backends/test/LstmTestImpl.hpp @@ -8,12 +8,12 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" +#include #include -#include "backends/WorkloadFactory.hpp" +#include LayerTestResult LstmNoCifgNoPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory, const boost::multi_array& input, @@ -283,7 +283,7 @@ LstmLayerFloat32NoCifgWithPeepholeWithProjectionTestImpl(armnn::IWorkloadFactory AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get()); AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get()); - + AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get()); AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get()); AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get()); diff --git a/src/backends/test/MemCopyTests.cpp b/src/backends/test/MemCopyTests.cpp index a1dc34a6ec..f66caffd92 100644 --- a/src/backends/test/MemCopyTests.cpp +++ b/src/backends/test/MemCopyTests.cpp @@ -8,13 +8,13 @@ #include #include #if ARMCOMPUTECL_ENABLED -#include +#include #endif #if ARMCOMPUTENEON_ENABLED #include #endif #include -#include "test/TensorHelpers.hpp" +#include #include "TensorCopyUtils.hpp" #include "WorkloadTestUtils.hpp" diff --git a/src/backends/test/NormTestImpl.hpp b/src/backends/test/NormTestImpl.hpp index 2690313655..dfa2f7072e 100644 --- a/src/backends/test/NormTestImpl.hpp +++ b/src/backends/test/NormTestImpl.hpp @@ -3,11 +3,11 @@ // SPDX-License-Identifier: MIT // -#include "armnn/Exceptions.hpp" -#include "armnn/LayerSupport.hpp" +#include +#include -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include LayerTestResult SimpleNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory, armnn::NormalizationAlgorithmChannel normChannel, diff --git a/src/backends/test/PermuteTestImpl.hpp b/src/backends/test/PermuteTestImpl.hpp index b49c539b2e..2caf2c8d2b 100644 --- a/src/backends/test/PermuteTestImpl.hpp +++ b/src/backends/test/PermuteTestImpl.hpp @@ -9,11 +9,11 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include template LayerTestResult SimplePermuteTestImpl( diff --git a/src/backends/test/Pooling2dTestImpl.hpp b/src/backends/test/Pooling2dTestImpl.hpp index e8c7e86e9d..4c69fb9f46 100644 --- a/src/backends/test/Pooling2dTestImpl.hpp +++ b/src/backends/test/Pooling2dTestImpl.hpp @@ -9,11 +9,11 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include #include diff --git a/src/backends/test/ReshapeTestImpl.hpp b/src/backends/test/ReshapeTestImpl.hpp index 5d32d9d3a6..cbd3b58798 100644 --- a/src/backends/test/ReshapeTestImpl.hpp +++ b/src/backends/test/ReshapeTestImpl.hpp @@ -9,11 +9,11 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include template LayerTestResult SimpleReshapeTestImpl( diff --git a/src/backends/test/SoftmaxTestImpl.hpp b/src/backends/test/SoftmaxTestImpl.hpp index 5bc13fa21c..7ca5f70e85 100644 --- a/src/backends/test/SoftmaxTestImpl.hpp +++ b/src/backends/test/SoftmaxTestImpl.hpp @@ -9,11 +9,11 @@ #include #include -#include "test/TensorHelpers.hpp" +#include #include "QuantizeHelper.hpp" -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include #include diff --git a/src/backends/test/SplitterTestImpl.hpp b/src/backends/test/SplitterTestImpl.hpp index 5dcc412d0e..4578ce5792 100644 --- a/src/backends/test/SplitterTestImpl.hpp +++ b/src/backends/test/SplitterTestImpl.hpp @@ -8,12 +8,12 @@ #include #include -#include "test/TensorHelpers.hpp" +#include -#include "backends/CpuTensorHandle.hpp" -#include "backends/WorkloadFactory.hpp" +#include +#include -#include "backends/test/QuantizeHelper.hpp" +#include template diff --git a/src/backends/test/TensorCopyUtils.cpp b/src/backends/test/TensorCopyUtils.cpp index b2a9efc01e..e92469aaa3 100644 --- a/src/backends/test/TensorCopyUtils.cpp +++ b/src/backends/test/TensorCopyUtils.cpp @@ -11,7 +11,7 @@ #include "TensorCopyUtils.hpp" #ifdef ARMCOMPUTECL_ENABLED -#include +#include #endif #if ARMCOMPUTENEON_ENABLED @@ -22,7 +22,7 @@ #include #endif -#include "backends/CpuTensorHandle.hpp" +#include void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem) { diff --git a/src/backends/test/TensorCopyUtils.hpp b/src/backends/test/TensorCopyUtils.hpp index 0cec839903..4a3ba64239 100644 --- a/src/backends/test/TensorCopyUtils.hpp +++ b/src/backends/test/TensorCopyUtils.hpp @@ -4,8 +4,8 @@ // #pragma once -#include "armnn/Tensor.hpp" -#include "backends/ITensorHandle.hpp" +#include +#include void CopyDataToITensorHandle(armnn::ITensorHandle* tensorHandle, const void* mem); -- cgit v1.2.1