diff options
Diffstat (limited to 'src/backends')
35 files changed, 777 insertions, 49 deletions
diff --git a/src/backends/aclCommon/test/MemCopyTestImpl.hpp b/src/backends/aclCommon/test/MemCopyTestImpl.hpp index 4247cc5ef4..4e0bfa85de 100644 --- a/src/backends/aclCommon/test/MemCopyTestImpl.hpp +++ b/src/backends/aclCommon/test/MemCopyTestImpl.hpp @@ -4,6 +4,8 @@ // #pragma once +#include <TypeUtils.hpp> + #include <backendsCommon/IBackendInternal.hpp> #include <backendsCommon/test/LayerTests.hpp> @@ -18,33 +20,29 @@ namespace { -LayerTestResult<float, 4> MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, - armnn::IWorkloadFactory& dstWorkloadFactory, - bool withSubtensors) +template<armnn::DataType dataType, typename T = armnn::ResolveType<dataType>> +LayerTestResult<T, 4> MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, + armnn::IWorkloadFactory& dstWorkloadFactory, + bool withSubtensors) { const std::array<unsigned int, 4> shapeData = { { 1u, 1u, 6u, 5u } }; const armnn::TensorShape tensorShape(4, shapeData.data()); - const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32); - boost::multi_array<float, 4> inputData = MakeTensor<float, 4>(tensorInfo, std::vector<float>( + const armnn::TensorInfo tensorInfo(tensorShape, dataType); + boost::multi_array<T, 4> inputData = MakeTensor<T, 4>(tensorInfo, std::vector<T>( { - 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, - - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, }) ); - LayerTestResult<float, 4> ret(tensorInfo); + LayerTestResult<T, 4> ret(tensorInfo); ret.outputExpected = inputData; - boost::multi_array<float, 4> outputData(shapeData); + boost::multi_array<T, 4> outputData(shapeData); auto inputTensorHandle = srcWorkloadFactory.CreateTensorHandle(tensorInfo); auto outputTensorHandle = dstWorkloadFactory.CreateTensorHandle(tensorInfo); @@ -75,8 +73,11 @@ LayerTestResult<float, 4> MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactor return ret; } -template<typename SrcWorkloadFactory, typename DstWorkloadFactory> -LayerTestResult<float, 4> MemCopyTest(bool withSubtensors) +template<typename SrcWorkloadFactory, + typename DstWorkloadFactory, + armnn::DataType dataType, + typename T = armnn::ResolveType<dataType>> +LayerTestResult<T, 4> MemCopyTest(bool withSubtensors) { armnn::IBackendInternal::IMemoryManagerSharedPtr srcMemoryManager = WorkloadFactoryHelper<SrcWorkloadFactory>::GetMemoryManager(); @@ -87,7 +88,7 @@ LayerTestResult<float, 4> MemCopyTest(bool withSubtensors) SrcWorkloadFactory srcWorkloadFactory = WorkloadFactoryHelper<SrcWorkloadFactory>::GetFactory(srcMemoryManager); DstWorkloadFactory dstWorkloadFactory = WorkloadFactoryHelper<DstWorkloadFactory>::GetFactory(dstMemoryManager); - return MemCopyTest(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); + return MemCopyTest<dataType>(srcWorkloadFactory, dstWorkloadFactory, withSubtensors); } } // anonymous namespace diff --git a/src/backends/aclCommon/test/MemCopyTests.cpp b/src/backends/aclCommon/test/MemCopyTests.cpp index 7099a70bc7..78cd95b21d 100644 --- a/src/backends/aclCommon/test/MemCopyTests.cpp +++ b/src/backends/aclCommon/test/MemCopyTests.cpp @@ -47,25 +47,29 @@ BOOST_FIXTURE_TEST_SUITE(MemCopyClNeon, ClContextControlFixture) BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpu) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(false); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeon) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(false); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory, armnn::DataType::Float32>(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndGpuWithSubtensors) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory>(true); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::NeonWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndNeonWithSubtensors) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory>(true); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::ClWorkloadFactory, armnn::NeonWorkloadFactory, armnn::DataType::Float32>(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } diff --git a/src/backends/backends.cmake b/src/backends/backends.cmake index c82de8d8e8..438fda3664 100644 --- a/src/backends/backends.cmake +++ b/src/backends/backends.cmake @@ -5,8 +5,8 @@ # single place to use wildcards, so we can include # yet unknown backend modules and corresponding common libraries -FILE(GLOB commonIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/common.cmake) -FILE(GLOB backendIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/backend.cmake) +file(GLOB commonIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/common.cmake) +file(GLOB backendIncludes ${PROJECT_SOURCE_DIR}/src/backends/*/backend.cmake) # prefer to include common code first foreach(includeFile ${commonIncludes}) diff --git a/src/backends/backendsCommon/IBackendInternal.hpp b/src/backends/backendsCommon/IBackendInternal.hpp index b102d1a1f9..2e6b056798 100644 --- a/src/backends/backendsCommon/IBackendInternal.hpp +++ b/src/backends/backendsCommon/IBackendInternal.hpp @@ -6,6 +6,10 @@ #include <armnn/Types.hpp> #include <armnn/IRuntime.hpp> + +#include <ISubGraphConverter.hpp> +#include <SubGraph.hpp> + #include <vector> namespace armnn @@ -37,6 +41,8 @@ public: using IMemoryManagerUniquePtr = std::unique_ptr<IMemoryManager>; using IMemoryManagerSharedPtr = std::shared_ptr<IMemoryManager>; + using ISubGraphConverterPtr = std::unique_ptr<ISubGraphConverter>; + virtual IMemoryManagerUniquePtr CreateMemoryManager() const = 0; virtual IWorkloadFactoryPtr CreateWorkloadFactory( @@ -44,6 +50,8 @@ public: virtual IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const = 0; + virtual ISubGraphConverterPtr CreateSubGraphConverter(const std::shared_ptr<SubGraph>& subGraph) const = 0; + virtual Optimizations GetOptimizations() const = 0; virtual ILayerSupportSharedPtr GetLayerSupport() const = 0; }; diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp index 2987e5dd2a..187d2f7d38 100644 --- a/src/backends/backendsCommon/LayerSupportBase.cpp +++ b/src/backends/backendsCommon/LayerSupportBase.cpp @@ -285,6 +285,13 @@ bool LayerSupportBase::IsPooling2dSupported(const TensorInfo& input, return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); } +bool LayerSupportBase::IsPreCompiledSupported(const TensorInfo& input, + const PreCompiledDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported); +} + bool LayerSupportBase::IsReshapeSupported(const TensorInfo& input, const ReshapeDescriptor& descriptor, Optional<std::string&> reasonIfUnsupported) const diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp index 8c7aa98043..c6f943c7e0 100644 --- a/src/backends/backendsCommon/LayerSupportBase.hpp +++ b/src/backends/backendsCommon/LayerSupportBase.hpp @@ -184,6 +184,10 @@ public: const Pooling2dDescriptor& descriptor, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsPreCompiledSupported(const TensorInfo& input, + const PreCompiledDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsReshapeSupported(const TensorInfo& input, const ReshapeDescriptor& descriptor, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index a5db088be7..97981e2b8d 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1051,4 +1051,9 @@ void RsqrtQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const "output"); } +void PreCompiledQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + // This is internally generated so it should not need validation. +} + } //namespace armnn diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp index 59e3dfbf5c..453896b912 100644 --- a/src/backends/backendsCommon/WorkloadData.hpp +++ b/src/backends/backendsCommon/WorkloadData.hpp @@ -378,4 +378,16 @@ struct RsqrtQueueDescriptor : QueueDescriptor void Validate(const WorkloadInfo& workloadInfo) const; }; +struct PreCompiledQueueDescriptor : QueueDescriptorWithParameters<PreCompiledDescriptor> +{ + PreCompiledQueueDescriptor() + : m_PreCompiledObject(nullptr) + { + } + + std::shared_ptr<void> m_PreCompiledObject; + + void Validate(const WorkloadInfo& workloadInfo) const; +}; + } //namespace armnn diff --git a/src/backends/backendsCommon/WorkloadDataFwd.hpp b/src/backends/backendsCommon/WorkloadDataFwd.hpp index 9ae20e0ce1..9fbd81b326 100644 --- a/src/backends/backendsCommon/WorkloadDataFwd.hpp +++ b/src/backends/backendsCommon/WorkloadDataFwd.hpp @@ -23,5 +23,6 @@ struct MultiplicationQueueDescriptor; struct BatchNormalizationQueueDescriptor; struct FakeQuantizationQueueDescriptor; struct ReshapeQueueDescriptor; +struct PreCompiledQueueDescriptor; -} // namespace armnn
\ No newline at end of file +} // namespace armnn diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 209ba6a4ed..0f015bd540 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -193,14 +193,13 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, case LayerType::Debug: { auto cLayer = boost::polymorphic_downcast<const DebugLayer*>(&layer); - const DebugDescriptor& descriptor = cLayer->GetParameters(); const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); result = layerSupportObject->IsDebugSupported(OverrideDataType(input, dataType), OverrideDataType(output, dataType), - descriptor, + cLayer->GetParameters(), reason); break; } @@ -577,6 +576,15 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, reason); break; } + case LayerType::PreCompiled: + { + auto cLayer = boost::polymorphic_downcast<const PreCompiledLayer*>(&layer); + const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + result = layerSupportObject->IsPreCompiledSupported(OverrideDataType(input, dataType), + cLayer->GetParameters(), + reason); + break; + } case LayerType::Division: { const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp index aee9f91b56..d516698d3f 100644 --- a/src/backends/backendsCommon/WorkloadFactory.hpp +++ b/src/backends/backendsCommon/WorkloadFactory.hpp @@ -159,6 +159,9 @@ public: virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const = 0; + + virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const = 0; }; } //namespace armnn diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 8107176210..7edd93e1d6 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -28,6 +28,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources OptimizedNetworkTests.cpp PermuteTestImpl.hpp Pooling2dTestImpl.hpp + PreCompiledTestImpl.cpp + PreCompiledTestImpl.hpp QuantizeHelper.hpp ReshapeTestImpl.hpp RuntimeTestImpl.hpp diff --git a/src/backends/backendsCommon/test/DebugTestImpl.hpp b/src/backends/backendsCommon/test/DebugTestImpl.hpp index e0f8a35d0a..d112054198 100644 --- a/src/backends/backendsCommon/test/DebugTestImpl.hpp +++ b/src/backends/backendsCommon/test/DebugTestImpl.hpp @@ -64,15 +64,15 @@ LayerTestResult<T, Dim> DebugTestImpl( outputHandle->Allocate(); CopyDataToITensorHandle(inputHandle.get(), input.data()); - + std::ostringstream oss; std::streambuf* coutStreambuf = std::cout.rdbuf(); std::cout.rdbuf(oss.rdbuf()); ExecuteWorkload(*workload, memoryManager); - + std::cout.rdbuf(coutStreambuf); - + BOOST_TEST(oss.str() == expectedStringOutput); CopyDataFromITensorHandle(ret.output.data(), outputHandle.get()); diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index 78716efaaf..edc58cf514 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -372,6 +372,8 @@ DECLARE_LAYER_POLICY_2_PARAM(Permute) DECLARE_LAYER_POLICY_2_PARAM(Pooling2d) +DECLARE_LAYER_POLICY_2_PARAM(PreCompiled) + DECLARE_LAYER_POLICY_1_PARAM(Division) DECLARE_LAYER_POLICY_2_PARAM(ResizeBilinear) diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp index 8e4596b703..0bf56e2445 100755..100644 --- a/src/backends/backendsCommon/test/LayerTests.cpp +++ b/src/backends/backendsCommon/test/LayerTests.cpp @@ -37,6 +37,7 @@ #include "StridedSliceTestImpl.hpp" #include "NormTestImpl.hpp" #include "PermuteTestImpl.hpp" +#include "PreCompiledTestImpl.hpp" #include "LstmTestImpl.hpp" #include "ConvertFp16ToFp32TestImpl.hpp" #include "ConvertFp32ToFp16TestImpl.hpp" @@ -8567,3 +8568,38 @@ LayerTestResult<uint8_t, 1> Debug1DUint8Test( { return Debug1DTest<uint8_t>(workloadFactory, memoryManager); } + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return PreCompiledConvolution2dTestImpl(workloadFactory, memoryManager); +} + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return PreCompiledConvolution2dStride2x2TestImpl(workloadFactory, memoryManager); +} + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTest( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + return PreCompiledDepthwiseConvolution2dTestImpl(workloadFactory, memoryManager); +} + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2Test( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + return PreCompiledDepthwiseConvolution2dStride2x2TestImpl(workloadFactory, memoryManager); +} + +LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + return PreCompiledMaxPooling2dTestImpl(workloadFactory, memoryManager); +} diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index 98c0806ddf..744470db49 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -1317,3 +1317,55 @@ LayerTestResult<uint8_t, 2> Debug2DUint8Test( LayerTestResult<uint8_t, 1> Debug1DUint8Test( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTest( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2Test( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<float, 4> Debug4DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<float, 3> Debug3DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<float, 2> Debug2DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<float, 1> Debug1DFloat32Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 4> Debug4DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 3> Debug3DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 2> Debug2DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 1> Debug1DUint8Test( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp b/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp new file mode 100644 index 0000000000..5a2bba1375 --- /dev/null +++ b/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp @@ -0,0 +1,491 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "PreCompiledTestImpl.hpp" + +#include "TensorCopyUtils.hpp" + +#include <Graph.hpp> +#include <Network.hpp> +#include <Runtime.hpp> + +#include <armnn/Exceptions.hpp> +#include <armnn/INetwork.hpp> + +#include <test/TensorHelpers.hpp> + +#include <backendsCommon/WorkloadFactory.hpp> + +#include <boost/polymorphic_pointer_cast.hpp> + +using namespace armnn; + +namespace +{ + +template<typename ConvolutionDescriptor> +struct PreCompiledConvolutionHelper +{ +}; + +template<> +struct PreCompiledConvolutionHelper<Convolution2dDescriptor> +{ + static IConnectableLayer* AddConvolutionLayerToNetwork( + Network& network, + Convolution2dDescriptor descriptor, + const ConstTensor& weights, + const ConstTensor& biases) + { + return network.AddConvolution2dLayer(descriptor, weights, biases, "convolution"); + } +}; + +template<> +struct PreCompiledConvolutionHelper<DepthwiseConvolution2dDescriptor> +{ + static IConnectableLayer* AddConvolutionLayerToNetwork( + Network& network, + DepthwiseConvolution2dDescriptor descriptor, + const ConstTensor& weights, + const ConstTensor& biases) + { + return network.AddDepthwiseConvolution2dLayer(descriptor, weights, biases, "depthwiseConvolution"); + } +}; + +template<typename ConvolutionDescriptor> +ConvolutionDescriptor CreateConvolutionDescriptor(unsigned int stride, unsigned int padding) +{ + ConvolutionDescriptor descriptor; + + descriptor.m_StrideX = stride; + descriptor.m_StrideY = stride; + descriptor.m_PadLeft = padding; + descriptor.m_PadRight = padding; + descriptor.m_PadTop = padding; + descriptor.m_PadBottom = padding; + descriptor.m_BiasEnabled = true; + descriptor.m_DataLayout = DataLayout::NHWC; + + return descriptor; +} + +static std::vector<uint8_t> CreateIdentityConvolutionKernel( + unsigned int kernelSize, unsigned int channels) +{ + BOOST_ASSERT(kernelSize % 2 == 1); // kernelSize need to be an odd number + + const unsigned int numElements = channels * (kernelSize * kernelSize); + std::vector<uint8_t> kernel(numElements, 0u); + + unsigned int centerIndex = kernelSize / 2; + for(unsigned int y = 0u; y < kernelSize; y++) + { + for(unsigned int x = 0u; x < kernelSize; x++) + { + for(unsigned int channel = 0u; channel < channels; channel++) + { + if (x == centerIndex && y == centerIndex) + { + const unsigned int flatIndex = + (y * kernelSize * channels) + (x * channels) + channel; + + kernel[flatIndex] = 1u; + } + } + } + } + + return kernel; +} + +template<typename ConvolutionDescriptor> +std::vector<uint8_t> GetIdentityConvolutionExpectedOutputData( + const TensorInfo& inputInfo, + const TensorInfo& outputInfo, + const ConvolutionDescriptor& descriptor, + const std::vector<uint8_t>& inputData) +{ + const unsigned int outputDataSize = outputInfo.GetNumElements(); + std::vector<uint8_t> expectedOutputData(outputDataSize); + + const unsigned int channels = outputInfo.GetShape()[3]; + BOOST_ASSERT(channels == inputInfo.GetShape()[3]); + + const unsigned int inputW = inputInfo.GetShape()[2]; + + const unsigned int outputH = outputInfo.GetShape()[1]; + const unsigned int outputW = outputInfo.GetShape()[2]; + + // Pick values from the input buffer, but after each iteration skip a number of + // rows and columns equal to the stride in the respective dimension + for (unsigned int inputY = 0, outputY = 0; outputY < outputH; inputY += descriptor.m_StrideY, outputY++) + { + for (unsigned int inputX = 0, outputX = 0; outputX < outputW; inputX += descriptor.m_StrideX, outputX++) + { + for (unsigned int channel = 0u; channel < channels; channel++) + { + const unsigned int inputIndex = + (inputY * inputW * channels) + (inputX * channels) + channel; + const unsigned int outputIndex = + (outputY * outputW * channels) + (outputX * channels) + channel; + + expectedOutputData[outputIndex] = inputData[inputIndex]; + } + } + } + + return expectedOutputData; +} + +armnn::PreCompiledLayer* FindPreCompiledLayer(armnn::Graph& optimisedGraph) +{ + for (auto& layer : optimisedGraph) + { + if (layer->GetType() == armnn::LayerType::PreCompiled) + { + return boost::polymorphic_pointer_downcast<armnn::PreCompiledLayer>(layer); + } + } + + // No pre-compiled layer found + return nullptr; +} + +// NOTE: This only supports a single input and a single output +LayerTestResult<uint8_t, 4> OptimiseAndRunNetwork(armnn::IWorkloadFactory& workloadFactory, + Network& net, + TensorInfo inputInfo, + std::vector<uint8_t> inputData, + TensorInfo outputInfo, + std::vector<uint8_t> expectedOutputData) +{ + // Optimize the network for the backend supported by the factory + std::vector<BackendId> backends = {workloadFactory.GetBackendId()}; + IRuntimePtr runtime(IRuntime::Create(IRuntime::CreationOptions())); + IOptimizedNetworkPtr optimizedNet = Optimize(net, backends, runtime->GetDeviceSpec(), OptimizerOptions()); + if (!optimizedNet) + { + throw RuntimeException(std::string("Failed to optimize network for ") + std::string(backends[0]), + CHECK_LOCATION()); + } + + // Find the pre-compiled layer in the optimised graph + Graph& optimisedGraph = static_cast<OptimizedNetwork*>(optimizedNet.get())->GetGraph(); + PreCompiledLayer* preCompiledLayer = FindPreCompiledLayer(optimisedGraph); + if (!preCompiledLayer) + { + throw RuntimeException("Could not find pre-compiled layer in optimised graph", CHECK_LOCATION()); + } + + // Create the tensor handles + for (auto&& layer : optimisedGraph.TopologicalSort()) + { + layer->CreateTensorHandles(optimisedGraph, workloadFactory); + } + + // Create the pre-compiled workload + auto workload = preCompiledLayer->CreateWorkload(optimisedGraph, workloadFactory); + + // Set the input data + boost::multi_array<uint8_t, 4> input = MakeTensor<uint8_t, 4>(inputInfo, inputData); + const QueueDescriptor& workloadData = + static_cast<BaseWorkload<PreCompiledQueueDescriptor>*>(workload.get())->GetData(); + CopyDataToITensorHandle(workloadData.m_Inputs[0], &input[0][0][0][0]); + + // Execute the workload + workload->Execute(); + + // Set the expected and actual outputs + LayerTestResult<uint8_t, 4> result(outputInfo); + result.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData); + CopyDataFromITensorHandle(&result.output[0][0][0][0], workloadData.m_Outputs[0]); + return result; +} + +} // anonymous namespace + +template<typename ConvolutionDescriptor> +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + unsigned int inputSize, + unsigned int outputSize, + unsigned int channels, + unsigned int kernelSize, + const ConvolutionDescriptor& descriptor, + bool isDepthwiseConvolution = false) +{ + BOOST_ASSERT(descriptor.m_BiasEnabled == true); + BOOST_ASSERT(descriptor.m_DataLayout == DataLayout::NHWC); + + // Set up tensor shapes and infos + const TensorShape inputShape ({1, inputSize, inputSize, channels}); + const TensorShape outputShape({1, outputSize, outputSize, channels}); + const TensorShape kernelShape = isDepthwiseConvolution + // The format for the depthwise convolution is MIHW + ? TensorShape({1, channels, kernelSize, kernelSize}) + // The format for the regular convolution depends on the layout of the inputs, + // in this case is NHWC + : TensorShape({1, kernelSize, kernelSize, channels}); + const TensorShape biasesShape({1, 1, 1, channels}); + + // NOTE: inputScale * weightsScale / outputScale must be >= 0.0 and < 1.0 + TensorInfo inputInfo(inputShape, DataType::QuantisedAsymm8, 1.0f, 0); + TensorInfo outputInfo(outputShape, DataType::QuantisedAsymm8, 2.0f, 0); + TensorInfo weightsInfo(kernelShape, DataType::QuantisedAsymm8, 1.0f, 0); + TensorInfo biasesInfo(biasesShape, DataType::Signed32, 1.0f, 0); + + // Populate weight and bias data + std::vector<uint8_t> weightsData = CreateIdentityConvolutionKernel(kernelSize, channels); + + // NOTE: We need to multiply the elements of the identity kernel by 2 + // to compensate for the scaling factor + std::transform(weightsData.begin(), weightsData.end(), weightsData.begin(), + [](uint8_t w) -> uint8_t { return static_cast<uint8_t>(w * 2); }); + + const unsigned int biasDataSize = biasesInfo.GetNumElements(); + std::vector<int32_t> biasesData(biasDataSize, 0); + + // Construct network + Network network; + ConstTensor weights(weightsInfo, weightsData); + ConstTensor biases(biasesInfo, biasesData); + + IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input"); + + IConnectableLayer* const convolutionLayer = + PreCompiledConvolutionHelper<ConvolutionDescriptor> + ::AddConvolutionLayerToNetwork(network, descriptor, weights, biases); + + IConnectableLayer* const outputLayer = network.AddOutputLayer(0, "output"); + + inputLayer->GetOutputSlot(0).Connect(convolutionLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + + convolutionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + convolutionLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + // Generate input data: sequence [0, 1 .. 255] + const unsigned int inputDataSize = inputInfo.GetNumElements(); + std::vector<uint8_t> inputData(inputDataSize); + std::iota(inputData.begin(), inputData.end(), 0); + + // Set expected output + std::vector<uint8_t> expectedOutputData = + GetIdentityConvolutionExpectedOutputData(inputInfo, + outputInfo, + descriptor, + inputData); + + return OptimiseAndRunNetwork(workloadFactory, + network, + inputInfo, + inputData, + outputInfo, + expectedOutputData); +} + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 16; + const unsigned int channels = 1; + const unsigned int kernelSize = 3; + const unsigned int stride = 1; + const unsigned int padding = 1; + + Convolution2dDescriptor descriptor = + CreateConvolutionDescriptor<Convolution2dDescriptor>(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor); +} + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 8; + const unsigned int channels = 1; + const unsigned int kernelSize = 3; + const unsigned int stride = 2; + const unsigned int padding = 1; + + Convolution2dDescriptor descriptor = + CreateConvolutionDescriptor<Convolution2dDescriptor>(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor); +} + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 16; + const unsigned int channels = 3; + const unsigned int kernelSize = 1; + const unsigned int stride = 1; + const unsigned int padding = 0; + + DepthwiseConvolution2dDescriptor descriptor = + CreateConvolutionDescriptor<DepthwiseConvolution2dDescriptor>(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor, + true); +} + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager) +{ + const unsigned int inputSize = 16; + const unsigned int outputSize = 8; + const unsigned int channels = 3; + const unsigned int kernelSize = 3; + const unsigned int stride = 2; + const unsigned int padding = 1; + + DepthwiseConvolution2dDescriptor descriptor = + CreateConvolutionDescriptor<DepthwiseConvolution2dDescriptor>(stride, padding); + + return PreCompiledConvolution2dTestImpl(workloadFactory, + memoryManager, + inputSize, + outputSize, + channels, + kernelSize, + descriptor); +} + +LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) +{ + // Pooling cannot be run in isolation, it must be fused with the previous layer, e.g. Convolution2d. + + // Set up the Convolution descriptor + Convolution2dDescriptor convDescriptor; + convDescriptor.m_StrideX = 1; + convDescriptor.m_StrideY = 1; + convDescriptor.m_BiasEnabled = true; + convDescriptor.m_DataLayout = DataLayout::NHWC; + + // Set up the Convolution weights + TensorInfo weightsInfo(TensorShape({16, 1, 1, 16}), DataType::QuantisedAsymm8, 2.0f, 0); + const unsigned int weightsDataSize = weightsInfo.GetNumElements(); + std::vector<uint8_t> weightsData(weightsDataSize); + for (unsigned int i = 0; i < 16; ++i) + { + for (unsigned int j = 0; j < 16; ++j) + { + weightsData[(i * 16) + j] = i == j ? 1.0f : 0.0f; + } + } + ConstTensor weights(weightsInfo, weightsData); + + // Set up the Convolution biases + TensorInfo biasInfo(TensorShape({1, 1, 1, 16}), DataType::Signed32, 1.0f * 2.0f, 0); + const unsigned int biasDataSize = biasInfo.GetNumElements(); + std::vector<int32_t> biasData(biasDataSize, 0); + ConstTensor biases(biasInfo, biasData); + + // Set up the Convolution input + TensorInfo inputInfo(TensorShape({1, 16, 16, 16 }), DataType::QuantisedAsymm8, 1.0f, 0); + const unsigned int inputDataSize = inputInfo.GetNumElements(); + std::vector<uint8_t> inputData(inputDataSize); + for (unsigned int i = 0; i < inputDataSize; ++i) + { + inputData[i] = boost::numeric_cast<uint8_t>((i * 4) % 250); + } + + // Set up the Convolution output / Pooling input info + TensorInfo convOutputInfo(TensorShape({1, 16, 16, 16 }), DataType::QuantisedAsymm8, 4.0f, 0); + + // Set up the Pooling descriptor + Pooling2dDescriptor poolDescriptor; + poolDescriptor.m_PoolType = PoolingAlgorithm::Max; + poolDescriptor.m_PoolWidth = 2; + poolDescriptor.m_PoolHeight = 2; + poolDescriptor.m_StrideX = 2; + poolDescriptor.m_StrideY = 2; + poolDescriptor.m_PaddingMethod = PaddingMethod::Exclude; + poolDescriptor.m_DataLayout = DataLayout::NHWC; + + // Set the expected output from the Pooling layer + TensorInfo outputInfo(TensorShape({1, 8, 8, 16 }), DataType::QuantisedAsymm8, 4.0f, 0); + const unsigned int outputDataSize = outputInfo.GetNumElements(); + std::vector<uint8_t> expectedOutputData(outputDataSize); + // The Maxpooling inputs are the Convolution outputs, i.e. (Convolution inputs / 2) after scale adjustments + // Maxpooling selects the max value in each pool from its inputs and our pool size is 2x2 + for (unsigned int channel = 0; channel < 16; ++channel) + { + for (unsigned int row = 0; row < 8; ++row) + { + for (unsigned int column = 0; column < 8; ++column) + { + // The input and output data indexes are calculated for NHWC data layout. + // Output index: (row * columns * channels) + (column * channels) + channel + auto outIndex = (row * 8 * 16) + (column * 16) + channel; + // Input index: (row * strideY * columns * channels) + (column * strideX * channels) + channel + // and we take 4 entries for the 2x2 pool + auto in0Index = ((row * 2) * 16 * 16) + ((column * 2) * 16) + channel; + auto in1Index = ((row * 2) * 16 * 16) + (((column * 2) + 1) * 16) + channel; + auto in2Index = (((row * 2) + 1) * 16 * 16) + ((column * 2) * 16) + channel; + auto in3Index = (((row * 2) + 1) * 16 * 16) + (((column * 2) + 1) * 16) + channel; + // output value is the maximum of the input pool values, adjusted for the quantization scale change + auto maxIn = std::max<uint8_t>({inputData[in0Index], + inputData[in1Index], + inputData[in2Index], + inputData[in3Index]}); + expectedOutputData[outIndex] = maxIn / 2; + } + } + } + + // Construct the network + Network net; + IConnectableLayer* const inputLayer = net.AddInputLayer(0, "input"); + IConnectableLayer* const convLayer = net.AddConvolution2dLayer(convDescriptor, weights, biases, "conv"); + IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(poolDescriptor, "pooling2d"); + IConnectableLayer* const outputLayer = net.AddOutputLayer(0, "output"); + + // Connect the layers + inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + convLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0)); + convLayer->GetOutputSlot(0).SetTensorInfo(convOutputInfo); + poolingLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + return OptimiseAndRunNetwork(workloadFactory, + net, + inputInfo, + inputData, + outputInfo, + expectedOutputData); +} diff --git a/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp b/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp new file mode 100644 index 0000000000..f4e78b6002 --- /dev/null +++ b/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "LayerTests.hpp" + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2TestImpl( + armnn::IWorkloadFactory & workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager); + +LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTestImpl( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager); diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 2b82c185f0..2f9dfa9755 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -63,6 +63,12 @@ ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const return IBackendContextPtr{new ClBackendContext{options}}; } +IBackendInternal::ISubGraphConverterPtr ClBackend::CreateSubGraphConverter( + const std::shared_ptr<SubGraph>& subGraph) const +{ + return ISubGraphConverterPtr{}; +} + IBackendInternal::Optimizations ClBackend::GetOptimizations() const { return Optimizations{}; diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index ef98da08a4..84b5b9a9d2 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -25,6 +25,9 @@ public: IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::ISubGraphConverterPtr CreateSubGraphConverter( + const std::shared_ptr<SubGraph>& subGraph) const override; + IBackendInternal::Optimizations GetOptimizations() const override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; }; diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index af47f65d29..28011cfd7b 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -356,4 +356,10 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRsqrt(const RsqrtQueueDescri return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); } +std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); +} + } // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 85cbd91e11..286e897472 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -150,6 +150,9 @@ public: virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: template<typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args> static std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, diff --git a/src/backends/cl/test/ClMemCopyTests.cpp b/src/backends/cl/test/ClMemCopyTests.cpp index 93d8dd5662..3cd9af7910 100644 --- a/src/backends/cl/test/ClMemCopyTests.cpp +++ b/src/backends/cl/test/ClMemCopyTests.cpp @@ -17,25 +17,29 @@ BOOST_AUTO_TEST_SUITE(ClMemCopy) BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpu) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(false); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpu) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(false); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory, armnn::DataType::Float32>(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndGpuWithSubtensors) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory>(true); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::RefWorkloadFactory, armnn::ClWorkloadFactory, armnn::DataType::Float32>(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenGpuAndCpuWithSubtensors) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory>(true); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::ClWorkloadFactory, armnn::RefWorkloadFactory, armnn::DataType::Float32>(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index fd2b766500..ce97a1d03c 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -63,6 +63,12 @@ IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRu return IBackendContextPtr{}; } +IBackendInternal::ISubGraphConverterPtr NeonBackend::CreateSubGraphConverter( + const std::shared_ptr<SubGraph>& subGraph) const +{ + return ISubGraphConverterPtr{}; +} + IBackendInternal::Optimizations NeonBackend::GetOptimizations() const { return Optimizations{}; diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index 127a5a4b62..3b1d186a06 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -25,6 +25,9 @@ public: IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::ISubGraphConverterPtr CreateSubGraphConverter( + const std::shared_ptr<SubGraph>& subGraph) const override; + IBackendInternal::Optimizations GetOptimizations() const override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; }; diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 65093fb593..3728c86a66 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -324,4 +324,10 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRsqrt(const RsqrtQueueDesc return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info); } +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info); +} + } // namespace armnn diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 9a6308871a..68317ed651 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -151,6 +151,9 @@ public: virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager; }; diff --git a/src/backends/neon/test/NeonMemCopyTests.cpp b/src/backends/neon/test/NeonMemCopyTests.cpp index f6699a61ba..dbe1f8da3f 100644 --- a/src/backends/neon/test/NeonMemCopyTests.cpp +++ b/src/backends/neon/test/NeonMemCopyTests.cpp @@ -18,25 +18,29 @@ BOOST_AUTO_TEST_SUITE(NeonMemCopy) BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeon) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(false); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory, armnn::DataType::Float32>(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpu) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(false); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory, armnn::DataType::Float32>(false); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenCpuAndNeonWithSubtensors) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory>(true); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::RefWorkloadFactory, armnn::NeonWorkloadFactory, armnn::DataType::Float32>(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } BOOST_AUTO_TEST_CASE(CopyBetweenNeonAndCpuWithSubtensors) { - LayerTestResult<float, 4> result = MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory>(true); + LayerTestResult<float, 4> result = + MemCopyTest<armnn::NeonWorkloadFactory, armnn::RefWorkloadFactory, armnn::DataType::Float32>(true); BOOST_TEST(CompareTensors(result.output, result.outputExpected)); } diff --git a/src/backends/reference/RefBackend.cpp b/src/backends/reference/RefBackend.cpp index 8f5e9c4d5e..e4f468c15e 100644 --- a/src/backends/reference/RefBackend.cpp +++ b/src/backends/reference/RefBackend.cpp @@ -56,6 +56,12 @@ IBackendInternal::IMemoryManagerUniquePtr RefBackend::CreateMemoryManager() cons return IMemoryManagerUniquePtr{}; } +IBackendInternal::ISubGraphConverterPtr RefBackend::CreateSubGraphConverter( + const std::shared_ptr<SubGraph>& subGraph) const +{ + return ISubGraphConverterPtr{}; +} + IBackendInternal::Optimizations RefBackend::GetOptimizations() const { return Optimizations{}; diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp index 1a0aef58c4..51366221c7 100644 --- a/src/backends/reference/RefBackend.hpp +++ b/src/backends/reference/RefBackend.hpp @@ -25,6 +25,9 @@ public: IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::ISubGraphConverterPtr CreateSubGraphConverter( + const std::shared_ptr<SubGraph>& subGraph) const override; + IBackendInternal::Optimizations GetOptimizations() const override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; }; diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index 79293635fb..361a3f1f74 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -318,4 +318,10 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateRsqrt(const RsqrtQueueDescr return MakeWorkload<RefRsqrtFloat32Workload, NullWorkload>(descriptor, info); } -} // namespace armnn
\ No newline at end of file +std::unique_ptr<IWorkload> RefWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +} // namespace armnn diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp index f6707f5046..432ac72c6e 100644 --- a/src/backends/reference/RefWorkloadFactory.hpp +++ b/src/backends/reference/RefWorkloadFactory.hpp @@ -167,6 +167,10 @@ public: virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + private: template <typename F32Workload, typename U8Workload, typename QueueDescriptorType> diff --git a/src/backends/reference/workloads/Debug.cpp b/src/backends/reference/workloads/Debug.cpp index dfcbbd8e97..cc83c7b4ee 100644 --- a/src/backends/reference/workloads/Debug.cpp +++ b/src/backends/reference/workloads/Debug.cpp @@ -98,4 +98,4 @@ template void Debug<uint8_t>(const TensorInfo& inputInfo, const DebugDescriptor& descriptor, const uint8_t* inputData, uint8_t* outputData); -} //namespace armnn +} // namespace armnn diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp index 17eb8fc143..d9a47c0596 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.cpp +++ b/src/backends/reference/workloads/RefDebugWorkload.cpp @@ -2,11 +2,12 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // + #include "RefDebugWorkload.hpp" #include "Debug.hpp" - #include "RefWorkloadUtils.hpp" -#include "TypeUtils.hpp" + +#include <TypeUtils.hpp> namespace armnn { @@ -30,4 +31,4 @@ void RefDebugWorkload<DataType>::Execute() const template class RefDebugWorkload<DataType::Float32>; template class RefDebugWorkload<DataType::QuantisedAsymm8>; -} //namespace armnn +} // namespace armnn diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp index a1231f92d3..c1a3e26ec2 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.hpp +++ b/src/backends/reference/workloads/RefDebugWorkload.hpp @@ -2,12 +2,13 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -#pragma once -#include <backendsCommon/Workload.hpp> +#pragma once #include <armnn/TypesUtils.hpp> +#include <backendsCommon/Workload.hpp> + namespace armnn { @@ -30,4 +31,4 @@ public: using RefDebugFloat32Workload = RefDebugWorkload<DataType::Float32>; using RefDebugUint8Workload = RefDebugWorkload<DataType::QuantisedAsymm8>; -} //namespace armnn +} // namespace armnn |