aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends')
-rw-r--r--src/armnn/backends/AclBaseMemoryManager.cpp32
-rw-r--r--src/armnn/backends/AclBaseMemoryManager.hpp46
-rw-r--r--src/armnn/backends/ArmComputeTensorUtils.hpp14
-rw-r--r--src/armnn/backends/ClContextControl.cpp234
-rw-r--r--src/armnn/backends/ClContextControl.hpp60
-rw-r--r--src/armnn/backends/ClLayerSupport.cpp18
-rw-r--r--src/armnn/backends/ClLayerSupport.hpp2
-rw-r--r--src/armnn/backends/ClWorkloadFactory.cpp164
-rw-r--r--src/armnn/backends/ClWorkloadFactory.hpp41
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp43
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp19
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp21
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp10
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp22
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp8
-rw-r--r--src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp5
-rw-r--r--src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp6
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp6
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp9
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp10
-rw-r--r--src/armnn/backends/MakeWorkloadHelper.hpp24
-rw-r--r--src/armnn/backends/NeonLayerSupport.cpp54
-rw-r--r--src/armnn/backends/NeonLayerSupport.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloadFactory.cpp31
-rw-r--r--src/armnn/backends/NeonWorkloadFactory.hpp10
-rw-r--r--src/armnn/backends/NeonWorkloadUtils.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp36
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp16
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp8
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp14
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp10
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp10
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp5
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp11
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp12
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp3
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp5
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp7
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp10
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp9
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp8
-rw-r--r--src/armnn/backends/RefLayerSupport.cpp5
-rw-r--r--src/armnn/backends/RefLayerSupport.hpp2
-rw-r--r--src/armnn/backends/RefWorkloadFactory.cpp2
-rw-r--r--src/armnn/backends/RefWorkloads/ConvImpl.hpp7
-rw-r--r--src/armnn/backends/RefWorkloads/Merger.hpp2
-rw-r--r--src/armnn/backends/RefWorkloads/Splitter.hpp2
-rw-r--r--src/armnn/backends/WorkloadFactory.cpp50
-rw-r--r--src/armnn/backends/WorkloadFactory.hpp7
-rw-r--r--src/armnn/backends/test/ArmComputeCl.cpp3
-rw-r--r--src/armnn/backends/test/ArmComputeNeon.cpp9
-rw-r--r--src/armnn/backends/test/Conv2dTestImpl.hpp129
-rw-r--r--src/armnn/backends/test/CreateWorkloadCl.cpp27
-rw-r--r--src/armnn/backends/test/CreateWorkloadNeon.cpp11
-rw-r--r--src/armnn/backends/test/CreateWorkloadRef.cpp11
-rw-r--r--src/armnn/backends/test/FullyConnectedTestImpl.hpp7
-rw-r--r--src/armnn/backends/test/IsLayerSupportedTest.cpp3
-rw-r--r--src/armnn/backends/test/LayerTests.cpp777
-rw-r--r--src/armnn/backends/test/LayerTests.hpp3
-rw-r--r--src/armnn/backends/test/MemCopyTests.cpp2
-rw-r--r--src/armnn/backends/test/NormTestImpl.hpp3
-rw-r--r--src/armnn/backends/test/Reference.cpp3
-rw-r--r--src/armnn/backends/test/SoftmaxTestImpl.hpp5
-rw-r--r--src/armnn/backends/test/SplitterTestImpl.hpp187
66 files changed, 1557 insertions, 765 deletions
diff --git a/src/armnn/backends/AclBaseMemoryManager.cpp b/src/armnn/backends/AclBaseMemoryManager.cpp
new file mode 100644
index 0000000000..fc796995c7
--- /dev/null
+++ b/src/armnn/backends/AclBaseMemoryManager.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#include "AclBaseMemoryManager.hpp"
+
+namespace armnn
+{
+
+#if ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED
+AclBaseMemoryManager::AclBaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc)
+{
+ // (re)create the memory manager components
+ m_Allocator = std::move(alloc);
+ m_IntraLayerLifetimeMgr = std::make_shared<arm_compute::BlobLifetimeManager>();
+ m_IntraLayerPoolMgr = std::make_shared<arm_compute::PoolManager>();
+ m_IntraLayerMemoryMgr = std::make_shared<arm_compute::MemoryManagerOnDemand>(m_IntraLayerLifetimeMgr,
+ m_IntraLayerPoolMgr);
+}
+
+void AclBaseMemoryManager::Finalize()
+{
+ // Set allocator that the memory manager will use
+ m_IntraLayerMemoryMgr->set_allocator(m_Allocator.get());
+ // Number of pools that the manager will create. This specifies how many layers you want to run in parallel
+ m_IntraLayerMemoryMgr->set_num_pools(1);
+ // Finalize the memory manager. (Validity checks, memory allocations, etc)
+ m_IntraLayerMemoryMgr->finalize();
+}
+#endif
+
+}
diff --git a/src/armnn/backends/AclBaseMemoryManager.hpp b/src/armnn/backends/AclBaseMemoryManager.hpp
new file mode 100644
index 0000000000..74b596fe97
--- /dev/null
+++ b/src/armnn/backends/AclBaseMemoryManager.hpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "WorkloadFactory.hpp"
+
+#if ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED
+#include "arm_compute/runtime/IAllocator.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
+
+#include <memory>
+#endif
+
+namespace armnn
+{
+
+// ARM Compute Base Memory Manager
+class AclBaseMemoryManager
+{
+public:
+
+ AclBaseMemoryManager() { }
+ virtual ~AclBaseMemoryManager() { }
+
+#if ARMCOMPUTENEON_ENABLED || ARMCOMPUTECL_ENABLED
+ AclBaseMemoryManager(std::unique_ptr<arm_compute::IAllocator> alloc);
+
+ void Finalize();
+
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& Get() { return m_IntraLayerMemoryMgr; }
+
+protected:
+
+ mutable std::unique_ptr<arm_compute::IAllocator> m_Allocator;
+ mutable std::shared_ptr<arm_compute::BlobLifetimeManager> m_IntraLayerLifetimeMgr;
+ mutable std::shared_ptr<arm_compute::PoolManager> m_IntraLayerPoolMgr;
+ mutable std::shared_ptr<arm_compute::MemoryManagerOnDemand> m_IntraLayerMemoryMgr;
+#endif
+
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ArmComputeTensorUtils.hpp b/src/armnn/backends/ArmComputeTensorUtils.hpp
index 9a13caf495..84547f9c80 100644
--- a/src/armnn/backends/ArmComputeTensorUtils.hpp
+++ b/src/armnn/backends/ArmComputeTensorUtils.hpp
@@ -9,6 +9,7 @@
#include <arm_compute/core/ITensor.h>
#include <arm_compute/core/TensorInfo.h>
+#include <arm_compute/core/Types.h>
#include <boost/cast.hpp>
@@ -38,6 +39,19 @@ arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const
/// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector
arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector);
+/// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor
+template <typename Descriptor>
+arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor)
+{
+ return arm_compute::PadStrideInfo(descriptor.m_StrideX,
+ descriptor.m_StrideY,
+ descriptor.m_PadLeft,
+ descriptor.m_PadRight,
+ descriptor.m_PadTop,
+ descriptor.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+}
+
/// Sets up the given ArmCompute tensor's dimensions based on the given ArmNN tensor.
template <typename Tensor>
void BuildArmComputeTensor(Tensor& tensor, const armnn::TensorInfo& tensorInfo)
diff --git a/src/armnn/backends/ClContextControl.cpp b/src/armnn/backends/ClContextControl.cpp
new file mode 100644
index 0000000000..f086328e55
--- /dev/null
+++ b/src/armnn/backends/ClContextControl.cpp
@@ -0,0 +1,234 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClContextControl.hpp"
+
+#include "armnn/Exceptions.hpp"
+
+#ifdef ARMCOMPUTECL_ENABLED
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#endif
+
+#include <boost/assert.hpp>
+#include <boost/format.hpp>
+#include <boost/log/trivial.hpp>
+#include <boost/polymorphic_cast.hpp>
+
+#include "LeakChecking.hpp"
+
+namespace cl
+{
+class Context;
+class CommandQueue;
+class Device;
+}
+
+namespace armnn
+{
+
+ClContextControl::ClContextControl(IClTunedParameters* clTunedParameters)
+ : m_clTunedParameters(boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters))
+{
+#ifdef ARMCOMPUTECL_ENABLED
+ try
+ {
+ std::vector<cl::Platform> platforms;
+ cl::Platform::get(&platforms);
+
+ // Select default platform as the first element
+ cl::Platform::setDefault(platforms[0]);
+
+ std::vector<cl::Device> devices;
+ platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
+
+ // Select default device as the first element
+ cl::Device::setDefault(devices[0]);
+ }
+ catch (const cl::Error& clError)
+ {
+ throw ClRuntimeUnavailableException(boost::str(boost::format(
+ "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%"
+ ) % clError.what() % clError.err()));
+ }
+
+ // Remove the use of global CL context
+ cl::Context::setDefault(cl::Context{});
+ BOOST_ASSERT(cl::Context::getDefault()() == NULL);
+
+ // Remove the use of global CL command queue
+ cl::CommandQueue::setDefault(cl::CommandQueue{});
+ BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL);
+
+ // always load the OpenCL runtime
+ LoadOpenClRuntime();
+#endif
+}
+
+ClContextControl::~ClContextControl()
+{
+#ifdef ARMCOMPUTECL_ENABLED
+ // load the OpencCL runtime without the tuned parameters to free the memory for them
+ try
+ {
+ UnloadOpenClRuntime();
+ }
+ catch (const cl::Error& clError)
+ {
+ // this should not happen, it is ignored if it does
+
+ // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an
+ // exception of type std::length_error.
+ // Using stderr instead in this context as there is no point in nesting try-catch blocks here.
+ std::cerr << "A CL error occurred unloading the runtime tuner parameters: "
+ << clError.what() << ". CL error code is: " << clError.err() << std::endl;
+ }
+#endif
+}
+
+void ClContextControl::LoadOpenClRuntime()
+{
+ DoLoadOpenClRuntime(true);
+}
+
+void ClContextControl::UnloadOpenClRuntime()
+{
+ DoLoadOpenClRuntime(false);
+}
+
+void ClContextControl::DoLoadOpenClRuntime(bool useTunedParameters)
+{
+#ifdef ARMCOMPUTECL_ENABLED
+ cl::Device device = cl::Device::getDefault();
+ cl::Context context;
+ cl::CommandQueue commandQueue;
+
+ if (arm_compute::CLScheduler::get().context()() != NULL)
+ {
+ // wait for all queued CL requests to finish before reinitialising it
+ arm_compute::CLScheduler::get().sync();
+ }
+
+ try
+ {
+ arm_compute::CLKernelLibrary::get().clear_programs_cache();
+ // initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no
+ // context references); it is initialised again, with a proper context, later.
+ arm_compute::CLScheduler::get().init(context, commandQueue, device);
+ arm_compute::CLKernelLibrary::get().init(".", context, device);
+
+ {
+ //
+ // Here we replace the context with a new one which in
+ // the memory leak checks shows as an extra allocation but
+ // because of the scope of the leak check it doesn't count
+ // the disposal of the original object. On the other hand it
+ // does count the creation of this context which it flags
+ // as a memory leak. By adding the following line we prevent
+ // this to happen.
+ //
+ ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE();
+ context = cl::Context(device);
+ }
+
+ bool enableProfiling = false;
+#if ARMNN_PROFILING_ENABLED
+ enableProfiling = true;
+#endif
+ if (useTunedParameters &&
+ m_clTunedParameters && m_clTunedParameters->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters)
+ {
+ enableProfiling = true; // Needed for the CLTuner to work.
+ }
+
+ if (enableProfiling)
+ {
+ // Create a new queue with profiling enabled
+ commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
+ }
+ else
+ {
+ // Use default queue
+ commandQueue = cl::CommandQueue(context, device);
+ }
+ }
+ catch (const cl::Error& clError)
+ {
+ throw ClRuntimeUnavailableException(boost::str(boost::format(
+ "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%"
+ ) % clError.what() % clError.err()));
+ }
+
+ // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
+ arm_compute::CLKernelLibrary::get().init(".", context, device);
+
+ arm_compute::ICLTuner* tuner = nullptr;
+ if (useTunedParameters && m_clTunedParameters)
+ {
+ tuner = &m_clTunedParameters->m_Tuner;
+ }
+ arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner);
+#endif
+}
+
+void ClContextControl::ClearClCache()
+{
+ DoLoadOpenClRuntime(true);
+}
+
+armnn::IClTunedParameters* IClTunedParameters::CreateRaw(armnn::IClTunedParameters::Mode mode)
+{
+ return new ClTunedParameters(mode);
+}
+
+armnn::IClTunedParametersPtr IClTunedParameters::Create(armnn::IClTunedParameters::Mode mode)
+{
+ return IClTunedParametersPtr(CreateRaw(mode), &IClTunedParameters::Destroy);
+}
+
+void IClTunedParameters::Destroy(IClTunedParameters* params)
+{
+ delete params;
+}
+
+ClTunedParameters::ClTunedParameters(armnn::IClTunedParameters::Mode mode)
+ : m_Mode(mode)
+#ifdef ARMCOMPUTECL_ENABLED
+ , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters)
+#endif
+{
+}
+
+void ClTunedParameters::Load(const char* filename)
+{
+#ifdef ARMCOMPUTECL_ENABLED
+ try
+ {
+ m_Tuner.load_from_file(filename);
+ }
+ catch (const std::exception& e)
+ {
+ throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " +
+ e.what());
+ }
+#endif
+}
+
+void ClTunedParameters::Save(const char* filename) const
+{
+#ifdef ARMCOMPUTECL_ENABLED
+ try
+ {
+ m_Tuner.save_to_file(filename);
+ }
+ catch (const std::exception& e)
+ {
+ throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " +
+ e.what());
+ }
+#endif
+}
+
+} // namespace armnn
diff --git a/src/armnn/backends/ClContextControl.hpp b/src/armnn/backends/ClContextControl.hpp
new file mode 100644
index 0000000000..8098e30b75
--- /dev/null
+++ b/src/armnn/backends/ClContextControl.hpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+#pragma once
+
+#include "armnn/IRuntime.hpp"
+
+#ifdef ARMCOMPUTECL_ENABLED
+#include <arm_compute/runtime/CL/CLTuner.h>
+#endif
+
+namespace armnn
+{
+
+class IClTunedParameters;
+class ClTunedParameters;
+
+// ARM Compute OpenCL context control
+class ClContextControl
+{
+public:
+
+ ClContextControl(IClTunedParameters* clTunedParameters = nullptr);
+
+ virtual ~ClContextControl();
+
+ void LoadOpenClRuntime();
+
+ // Users should call this (after freeing all of the cl::Context objects they use)
+ // to release the cached memory used by the compute library.
+ void UnloadOpenClRuntime();
+
+ // Clear the CL cache, without losing the tuned parameter settings
+ void ClearClCache();
+
+private:
+
+ void DoLoadOpenClRuntime(bool useTunedParameters);
+
+ ClTunedParameters* m_clTunedParameters;
+
+};
+
+class ClTunedParameters : public IClTunedParameters
+{
+public:
+ ClTunedParameters(armnn::IClTunedParameters::Mode mode);
+
+ virtual void Load(const char* filename);
+ virtual void Save(const char* filename) const;
+
+ Mode m_Mode;
+
+#ifdef ARMCOMPUTECL_ENABLED
+ arm_compute::CLTuner m_Tuner;
+#endif
+};
+
+} // namespace armnn
diff --git a/src/armnn/backends/ClLayerSupport.cpp b/src/armnn/backends/ClLayerSupport.cpp
index 5f0e4ea622..8905adf1fc 100644
--- a/src/armnn/backends/ClLayerSupport.cpp
+++ b/src/armnn/backends/ClLayerSupport.cpp
@@ -16,6 +16,7 @@
#ifdef ARMCOMPUTECL_ENABLED
#include "ClWorkloads/ClAdditionFloat32Workload.hpp"
+#include "ClWorkloads/ClConvolution2dBaseWorkload.hpp"
#include "ClWorkloads/ClPooling2dBaseWorkload.hpp"
#include "ClWorkloads/ClPermuteWorkload.hpp"
#include "ClWorkloads/ClNormalizationFloat32Workload.hpp"
@@ -110,7 +111,7 @@ bool IsClDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsuppor
{
if (reasonIfUnsupported)
{
- *reasonIfUnsupported = "Depwthwise convolution Weight tensor needs to be 4d";
+ *reasonIfUnsupported = "Depthwise convolution Weight tensor needs to be 4d";
}
return false;
}
@@ -233,16 +234,19 @@ bool IsDirectConvolution2dParamsSupportedCl(std::string* reasonIfUnsupported,
}
bool IsConvolution2dSupportedCl(const TensorInfo& input,
+ const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
std::string* reasonIfUnsupported)
{
- return IsSupportedForDataTypeCl(reasonIfUnsupported,
- input.GetDataType(),
- &TrueFunc<decltype(descriptor), decltype(weights)>,
- &IsDirectConvolution2dParamsSupportedCl,
- descriptor,
- weights);
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClConvolution2dWorkloadValidate,
+ reasonIfUnsupported,
+ input,
+ output,
+ descriptor,
+ weights,
+ biases);
}
bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input,
diff --git a/src/armnn/backends/ClLayerSupport.hpp b/src/armnn/backends/ClLayerSupport.hpp
index f5b5ae8b15..4f71e907cf 100644
--- a/src/armnn/backends/ClLayerSupport.hpp
+++ b/src/armnn/backends/ClLayerSupport.hpp
@@ -33,8 +33,10 @@ bool IsConstantSupportedCl(const TensorInfo& output,
std::string* reasonIfUnsupported = nullptr);
bool IsConvolution2dSupportedCl(const TensorInfo& input,
+ const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
std::string* reasonIfUnsupported = nullptr);
bool IsDepthwiseConvolutionSupportedCl(const TensorInfo& input,
diff --git a/src/armnn/backends/ClWorkloadFactory.cpp b/src/armnn/backends/ClWorkloadFactory.cpp
index 6af657b6b4..916ca46aae 100644
--- a/src/armnn/backends/ClWorkloadFactory.cpp
+++ b/src/armnn/backends/ClWorkloadFactory.cpp
@@ -10,10 +10,10 @@
#include <string>
#include "CpuTensorHandle.hpp"
#include "Layer.hpp"
-#include "Layers.hpp"
#ifdef ARMCOMPUTECL_ENABLED
#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
#include <arm_compute/runtime/CL/CLScheduler.h>
#include "backends/MemCopyWorkload.hpp"
#include "backends/ClTensorHandle.hpp"
@@ -24,6 +24,7 @@
#include <boost/polymorphic_cast.hpp>
#include <boost/format.hpp>
+#include <boost/log/trivial.hpp>
namespace armnn
{
@@ -35,93 +36,9 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType,
#ifdef ARMCOMPUTECL_ENABLED
-ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters):
- m_clTunedParameters(boost::polymorphic_downcast<ClTunedParameters*>(clTunedParameters))
+ClWorkloadFactory::ClWorkloadFactory()
+: m_MemoryManager(std::make_unique<arm_compute::CLBufferAllocator>())
{
- try
- {
- std::vector<cl::Platform> platforms;
- cl::Platform::get(&platforms);
-
- // Select default platform as the first element
- cl::Platform::setDefault(platforms[0]);
-
- std::vector<cl::Device> devices;
- platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
-
- // Select default device as the first element
- cl::Device::setDefault(devices[0]);
- }
- catch (const cl::Error& clError)
- {
- throw ClRuntimeUnavailableException(boost::str(boost::format(
- "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%"
- ) % clError.what() % clError.err()));
- }
-
- // Remove the use of global CL context
- cl::Context::setDefault(cl::Context{});
- BOOST_ASSERT(cl::Context::getDefault()() == NULL);
-
- // Remove the use of global CL command queue
- cl::CommandQueue::setDefault(cl::CommandQueue{});
- BOOST_ASSERT(cl::CommandQueue::getDefault()() == NULL);
-}
-
-ClWorkloadFactory::~ClWorkloadFactory()
-{
-}
-
-void ClWorkloadFactory::LoadOpenClRuntime()
-{
- cl::Device device = cl::Device::getDefault();
- cl::Context context;
- cl::CommandQueue commandQueue;
-
- try
- {
- arm_compute::CLKernelLibrary::get().clear_programs_cache();
- arm_compute::CLScheduler::get().init(context, commandQueue, device);
- arm_compute::CLKernelLibrary::get().init(".", context, device);
-
- context = cl::Context(device);
-
- bool enableProfiling = false;
-#if ARMNN_PROFILING_ENABLED
- enableProfiling = true;
-#endif
- if (m_clTunedParameters && m_clTunedParameters->m_Mode == IClTunedParameters::Mode::UpdateTunedParameters)
- {
- enableProfiling = true; // Needed for the CLTuner to work.
- }
-
- if (enableProfiling)
- {
- // Create a new queue with profiling enabled
- commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
- }
- else
- {
- // Use default queue
- commandQueue = cl::CommandQueue(context, device);
- }
- }
- catch (const cl::Error& clError)
- {
- throw ClRuntimeUnavailableException(boost::str(boost::format(
- "Could not initialize the CL runtime. Error description: %1%. CL error code: %2%"
- ) % clError.what() % clError.err()));
- }
-
- // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
- arm_compute::CLKernelLibrary::get().init(".", context, device);
-
- arm_compute::ICLTuner* tuner = nullptr;
- if (m_clTunedParameters)
- {
- tuner = &m_clTunedParameters->m_Tuner;
- }
- arm_compute::CLScheduler::get().init(context, commandQueue, device, tuner);
}
std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
@@ -170,7 +87,7 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQ
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSoftmaxFloat32Workload, ClSoftmaxUint8Workload>(descriptor, info);
+ return MakeWorkload<ClSoftmaxFloat32Workload, ClSoftmaxUint8Workload>(descriptor, info, m_MemoryManager.Get());
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
@@ -188,7 +105,7 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateMerger(const MergerQu
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateFullyConnected(
const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return MakeWorkload<ClFullyConnectedFloat32Workload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClFullyConnectedFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
}
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
@@ -206,7 +123,8 @@ std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooli
std::unique_ptr<armnn::IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvolution2dFloat32Workload, ClConvolution2dUint8Workload>(descriptor, info);
+ return MakeWorkload<ClConvolution2dFloat32Workload, ClConvolution2dUint8Workload>(descriptor, info,
+ m_MemoryManager.Get());
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
@@ -302,20 +220,15 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescri
return MakeWorkload<ClFloorFloat32Workload, NullWorkload>(descriptor, info);
}
-#else // #if ARMCOMPUTECL_ENABLED
-
-ClWorkloadFactory::ClWorkloadFactory(IClTunedParameters* clTunedParameters)
+void ClWorkloadFactory::Finalize()
{
- // No CL support
+ m_MemoryManager.Finalize();
}
-ClWorkloadFactory::~ClWorkloadFactory()
-{
-}
+#else // #if ARMCOMPUTECL_ENABLED
-void ClWorkloadFactory::LoadOpenClRuntime()
+ClWorkloadFactory::ClWorkloadFactory()
{
- // No CL support
}
std::unique_ptr<ITensorHandle> ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
@@ -462,59 +375,10 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescri
return nullptr;
}
-#endif // #if ARMCOMPUTECL_ENABLED
-
-armnn::IClTunedParameters* IClTunedParameters::CreateRaw(armnn::IClTunedParameters::Mode mode)
-{
- return new ClTunedParameters(mode);
-}
-
-armnn::IClTunedParametersPtr IClTunedParameters::Create(armnn::IClTunedParameters::Mode mode)
-{
- return IClTunedParametersPtr(CreateRaw(mode), &IClTunedParameters::Destroy);
-}
-
-void IClTunedParameters::Destroy(IClTunedParameters* params)
+void ClWorkloadFactory::Finalize()
{
- delete params;
}
-ClTunedParameters::ClTunedParameters(armnn::IClTunedParameters::Mode mode)
- : m_Mode(mode)
-#ifdef ARMCOMPUTECL_ENABLED
- , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters)
-#endif
-{
-}
-
-void ClTunedParameters::Load(const char* filename)
-{
-#ifdef ARMCOMPUTECL_ENABLED
- try
- {
- m_Tuner.load_from_file(filename);
- }
- catch (const std::exception& e)
- {
- throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " +
- e.what());
- }
-#endif
-}
-
-void ClTunedParameters::Save(const char* filename) const
-{
-#ifdef ARMCOMPUTECL_ENABLED
- try
- {
- m_Tuner.save_to_file(filename);
- }
- catch (const std::exception& e)
- {
- throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " +
- e.what());
- }
-#endif
-}
+#endif // #if ARMCOMPUTECL_ENABLED
} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloadFactory.hpp b/src/armnn/backends/ClWorkloadFactory.hpp
index e1e66c050b..7365fe9aeb 100644
--- a/src/armnn/backends/ClWorkloadFactory.hpp
+++ b/src/armnn/backends/ClWorkloadFactory.hpp
@@ -4,42 +4,23 @@
//
#pragma once
-#include "WorkloadFactory.hpp"
+#include "AclBaseMemoryManager.hpp"
#include "OutputHandler.hpp"
#include "armnn/IRuntime.hpp"
-#ifdef ARMCOMPUTECL_ENABLED
-#include <arm_compute/runtime/CL/CLTuner.h>
-#endif
-
-namespace cl
-{
-class Context;
-class CommandQueue;
-class Device;
-}
-
namespace armnn
{
-class IClTunedParameters;
-class ClTunedParameters;
-
// ARM Compute OpenCL workload factory
class ClWorkloadFactory : public IWorkloadFactory
{
public:
-
- ClWorkloadFactory(IClTunedParameters* clTunedParameters = nullptr);
-
- virtual ~ClWorkloadFactory();
+ ClWorkloadFactory();
virtual Compute GetCompute() const override { return Compute::GpuAcc; }
static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported);
- void LoadOpenClRuntime();
-
virtual bool SupportsSubTensors() const override { return true; }
virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
@@ -114,23 +95,11 @@ public:
virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
-private:
- ClTunedParameters* m_clTunedParameters;
-};
+ void Finalize() override;
-class ClTunedParameters : public IClTunedParameters
-{
-public:
- ClTunedParameters(armnn::IClTunedParameters::Mode mode);
-
- virtual void Load(const char* filename);
- virtual void Save(const char* filename) const;
-
- Mode m_Mode;
+private:
-#ifdef ARMCOMPUTECL_ENABLED
- arm_compute::CLTuner m_Tuner;
-#endif
+ mutable AclBaseMemoryManager m_MemoryManager;
};
} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp
new file mode 100644
index 0000000000..9851a22dc6
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvolution2dBaseWorkload.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights);
+ arm_compute::TensorInfo aclBiasesInfo;
+ arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiasesInfo = BuildArmComputeTensorInfo(biases);
+ optionalAclBiasesInfo = &aclBiasesInfo;
+ }
+
+ arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+
+ return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
+ &aclWeightsInfo,
+ optionalAclBiasesInfo,
+ &aclOutputInfo,
+ layerInfo);
+}
+
+}
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp
new file mode 100644
index 0000000000..c4ef152361
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases);
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
index 6f4069bcc0..d7aef3d223 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
@@ -14,8 +14,9 @@ namespace armnn
using namespace armcomputetensorutils;
ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<Convolution2dQueueDescriptor>(descriptor, info)
+ , m_ConvolutionLayer(memoryManager)
{
// todo: check tensor shapes match
@@ -42,14 +43,11 @@ ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_pConvolutionLayer = std::make_unique<arm_compute::CLConvolutionLayer>();
- static_cast<arm_compute::CLConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input,
- &m_KernelTensor,
- optionalBias,
- &output,
- padStrideInfo);
-
- BOOST_ASSERT(m_pConvolutionLayer);
+ m_ConvolutionLayer.configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>());
@@ -62,9 +60,8 @@ ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution
void ClConvolution2dFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dFloat32Workload_Execute");
- BOOST_ASSERT(m_pConvolutionLayer);
- m_pConvolutionLayer->run();
+ m_ConvolutionLayer.run();
}
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
index 29931056a8..4cf73c89cc 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
@@ -7,16 +7,22 @@
#include "backends/ClWorkloadUtils.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
+
class ClConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
{
public:
- ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
void Execute() const override;
private:
- mutable std::unique_ptr<arm_compute::IFunction> m_pConvolutionLayer;
+ mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
arm_compute::CLTensor m_KernelTensor;
arm_compute::CLTensor m_BiasTensor;
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
index a3c6ac9dca..cf419e752e 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
@@ -14,8 +14,9 @@ namespace armnn
using namespace armcomputetensorutils;
ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info)
+ , m_ConvolutionLayer(memoryManager)
{
// todo: check tensor shapes match
@@ -42,16 +43,11 @@ ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQu
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- BOOST_ASSERT_MSG(IsClDirectConvolution2dSupported(weightInfo, m_Data.m_Parameters),
- "Unsupported parameters for u8 convolution");
-
- m_pConvolutionLayer = std::make_unique<arm_compute::CLDirectConvolutionLayer>();
- static_cast<arm_compute::CLDirectConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input,
- &m_KernelTensor,
- optionalBias,
- &output,
- padStrideInfo);
- BOOST_ASSERT(m_pConvolutionLayer);
+ m_ConvolutionLayer.configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
@@ -64,9 +60,9 @@ ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQu
void ClConvolution2dUint8Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dUint8Workload_Execute");
- BOOST_ASSERT(m_pConvolutionLayer);
- m_pConvolutionLayer->run();
+ m_ConvolutionLayer.run();
}
} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
index b2849d773b..d4d3908c80 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
@@ -7,6 +7,9 @@
#include "backends/ClWorkloadUtils.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
namespace armnn
{
@@ -14,11 +17,12 @@ namespace armnn
class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
{
public:
- ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
void Execute() const override;
private:
- mutable std::unique_ptr<arm_compute::IFunction> m_pConvolutionLayer;
+ mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
arm_compute::CLTensor m_KernelTensor;
arm_compute::CLTensor m_BiasTensor;
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
index 96596b9d9c..5dfab9cbbd 100644
--- a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
@@ -13,8 +13,9 @@ namespace armnn
using namespace armcomputetensorutils;
ClFullyConnectedFloat32Workload::ClFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
+ , m_FullyConnected(memoryManager)
{
BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
@@ -49,4 +50,4 @@ void ClFullyConnectedFloat32Workload::Execute() const
m_FullyConnected.run();
}
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
index def20e0831..c8d1227bda 100644
--- a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
@@ -7,6 +7,9 @@
#include "backends/ClWorkloadUtils.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
namespace armnn
{
@@ -15,7 +18,8 @@ class ClFullyConnectedFloat32Workload : public armnn::Float32Workload<armnn::Ful
{
public:
ClFullyConnectedFloat32Workload(const armnn::FullyConnectedQueueDescriptor& descriptor,
- const armnn::WorkloadInfo& info);
+ const armnn::WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
using armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>::m_Data;
void Execute() const override;
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
index 257e76a4df..1d05172b42 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
@@ -10,8 +10,10 @@
namespace armnn
{
-ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ , m_SoftmaxLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("ClSoftmaxFloat32Workload", 1, 1);
@@ -26,4 +28,4 @@ void ClSoftmaxFloat32Workload::Execute() const
m_SoftmaxLayer.run();
}
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
index a26bbe851d..cf5c45ac6f 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
@@ -7,13 +7,18 @@
#include "backends/ClWorkloadUtils.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
class ClSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
{
public:
- ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
void Execute() const override;
private:
@@ -22,5 +27,3 @@ private:
} //namespace armnn
-
-
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
index 9e856fea94..ee9ab4754b 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
@@ -10,8 +10,10 @@
namespace armnn
{
-ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ , m_SoftmaxLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1);
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
index 07ee6256d8..36c2c781aa 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
@@ -7,13 +7,18 @@
#include "backends/ClWorkloadUtils.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
// Softmax
class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
{
public:
- ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
void Execute() const override;
private:
@@ -23,6 +28,3 @@ private:
} //namespace armnn
-
-
-
diff --git a/src/armnn/backends/MakeWorkloadHelper.hpp b/src/armnn/backends/MakeWorkloadHelper.hpp
index a8729eb07c..a1f9b0b0eb 100644
--- a/src/armnn/backends/MakeWorkloadHelper.hpp
+++ b/src/armnn/backends/MakeWorkloadHelper.hpp
@@ -13,10 +13,12 @@ namespace
template<typename WorkloadType>
struct MakeWorkloadForType
{
- template<typename QueueDescriptorType>
- static std::unique_ptr<WorkloadType> Func(const QueueDescriptorType& descriptor, const WorkloadInfo& info)
+ template<typename QueueDescriptorType, typename... Args>
+ static std::unique_ptr<WorkloadType> Func(const QueueDescriptorType& descriptor,
+ const WorkloadInfo& info,
+ Args&&... args)
{
- return std::make_unique<WorkloadType>(descriptor, info);
+ return std::make_unique<WorkloadType>(descriptor, info, std::forward<Args>(args)...);
}
};
@@ -24,8 +26,10 @@ struct MakeWorkloadForType
template<>
struct MakeWorkloadForType<NullWorkload>
{
- template<typename QueueDescriptorType>
- static std::unique_ptr<NullWorkload> Func(const QueueDescriptorType& descriptor, const WorkloadInfo& info)
+ template<typename QueueDescriptorType, typename... Args>
+ static std::unique_ptr<NullWorkload> Func(const QueueDescriptorType& descriptor,
+ const WorkloadInfo& info,
+ Args&&... args)
{
return nullptr;
}
@@ -33,8 +37,8 @@ struct MakeWorkloadForType<NullWorkload>
// Makes a workload for one the specified types based on the data type requirements of the tensorinfo.
// Specify type void as the WorkloadType for unsupported DataType/WorkloadType combos.
-template <typename Float32Workload, typename Uint8Workload, typename QueueDescriptorType>
-std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info)
+template <typename Float32Workload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
+std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args)
{
const DataType dataType = !info.m_InputTensorInfos.empty() ?
info.m_InputTensorInfos[0].GetDataType()
@@ -46,9 +50,9 @@ std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, c
switch (dataType)
{
case DataType::Float32:
- return MakeWorkloadForType<Float32Workload>::Func(descriptor, info);
+ return MakeWorkloadForType<Float32Workload>::Func(descriptor, info, std::forward<Args>(args)...);
case DataType::QuantisedAsymm8:
- return MakeWorkloadForType<Uint8Workload>::Func(descriptor, info);
+ return MakeWorkloadForType<Uint8Workload>::Func(descriptor, info, std::forward<Args>(args)...);
default:
BOOST_ASSERT_MSG(false, "Unknown DataType.");
return nullptr;
@@ -56,4 +60,4 @@ std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor, c
}
} //namespace
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp
index d8a3366775..bfc84bd086 100644
--- a/src/armnn/backends/NeonLayerSupport.cpp
+++ b/src/armnn/backends/NeonLayerSupport.cpp
@@ -15,6 +15,7 @@
#include <boost/core/ignore_unused.hpp>
#ifdef ARMCOMPUTENEON_ENABLED
+#include "NeonWorkloads/NeonConvolution2dBaseWorkload.hpp"
#include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp"
#include "NeonWorkloads/NeonPermuteWorkload.hpp"
#endif
@@ -53,9 +54,10 @@ bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convol
const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) &&
(desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3);
- auto paddingLargerThan = [](const Convolution2dDescriptor& desc, unsigned int value)
+ auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value)
{
- return desc.m_PadLeft > value || desc.m_PadRight > value || desc.m_PadTop > value || desc.m_PadBottom > value;
+ return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value ||
+ conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value;
};
// Supported sizes and padding
@@ -71,22 +73,6 @@ bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convol
return preferDirectConvolution;
}
-bool IsNeonMultiplicationParamsSupported(std::string* reasonIfUnsupported,
- const TensorInfo& info0,
- const TensorInfo& info1)
-{
- if (info0.GetShape() == info1.GetShape())
- {
- return true;
- }
-
- if (reasonIfUnsupported)
- {
- *reasonIfUnsupported = "Multiplication on Neon does not support implicit broadcast.";
- }
- return false;
-}
-
bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters)
{
if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness)
@@ -194,16 +180,6 @@ bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupp
return false;
}
- if (parameters.m_PadLeft != parameters.m_PadRight || parameters.m_PadTop != parameters.m_PadBottom)
- {
- if (reasonIfUnsupported)
- {
- *reasonIfUnsupported = "Asymmetric padding for depthwise convolution currently not supported "
- "in Neon backend";
- }
- return false;
- }
-
return true;
}
@@ -241,15 +217,19 @@ bool IsConstantSupportedNeon(const TensorInfo& output,
}
bool IsConvolution2dSupportedNeon(const TensorInfo& input,
+ const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
std::string* reasonIfUnsupported)
{
- ignore_unused(descriptor);
- return IsSupportedForDataTypeNeon(reasonIfUnsupported,
- input.GetDataType(),
- &TrueFunc<>,
- &TrueFunc<>);
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate,
+ reasonIfUnsupported,
+ input,
+ output,
+ descriptor,
+ weights,
+ biases);
}
bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
@@ -309,13 +289,11 @@ bool IsMultiplicationSupportedNeon(const TensorInfo& input0,
const TensorInfo& input1,
std::string* reasonIfUnsupported)
{
+ ignore_unused(input1);
return IsSupportedForDataTypeNeon(reasonIfUnsupported,
input0.GetDataType(),
- &IsNeonMultiplicationParamsSupported,
- &FalseFuncU8<const TensorInfo&, const TensorInfo&>,
- input0,
- input1
- );
+ &TrueFunc<>,
+ &FalseFuncU8<>);
}
bool IsNormalizationSupportedNeon(const TensorInfo& input,
diff --git a/src/armnn/backends/NeonLayerSupport.hpp b/src/armnn/backends/NeonLayerSupport.hpp
index b2ac49ae0d..ce2ecec459 100644
--- a/src/armnn/backends/NeonLayerSupport.hpp
+++ b/src/armnn/backends/NeonLayerSupport.hpp
@@ -39,8 +39,10 @@ bool IsConstantSupportedNeon(const TensorInfo& output,
std::string* reasonIfUnsupported = nullptr);
bool IsConvolution2dSupportedNeon(const TensorInfo& input,
+ const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
std::string* reasonIfUnsupported = nullptr);
bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input,
diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp
index 0f65a3dcd7..a17988de5a 100644
--- a/src/armnn/backends/NeonWorkloadFactory.cpp
+++ b/src/armnn/backends/NeonWorkloadFactory.cpp
@@ -6,9 +6,9 @@
#include "armnn/Utils.hpp"
#include "CpuTensorHandle.hpp"
#include "Layer.hpp"
-#include "Layers.hpp"
#ifdef ARMCOMPUTENEON_ENABLED
+#include "arm_compute/runtime/Allocator.h"
#include "MemCopyWorkload.hpp"
#include "NeonTensorHandle.hpp"
#include "NeonWorkloadUtils.hpp"
@@ -29,6 +29,11 @@ bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType
#ifdef ARMCOMPUTENEON_ENABLED
+NeonWorkloadFactory::NeonWorkloadFactory()
+: m_MemoryManager(std::make_unique<arm_compute::Allocator>())
+{
+}
+
std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const
@@ -76,7 +81,8 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const Activatio
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NeonSoftmaxFloat32Workload, NeonSoftmaxUint8Workload>(descriptor, info);
+ return MakeWorkload<NeonSoftmaxFloat32Workload, NeonSoftmaxUint8Workload>(descriptor, info,
+ m_MemoryManager.Get());
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
@@ -94,7 +100,7 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const Merger
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return MakeWorkload<NeonFullyConnectedFloat32Workload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NeonFullyConnectedFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
}
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
@@ -112,7 +118,8 @@ std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Poo
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info);
+ return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info,
+ m_MemoryManager.Get());
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
@@ -125,7 +132,7 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return MakeWorkload<NeonNormalizationFloat32Workload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NeonNormalizationFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
}
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
@@ -188,7 +195,7 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NeonL2NormalizationFloat32Workload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NeonL2NormalizationFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
@@ -209,8 +216,17 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDesc
return MakeWorkload<NeonFloorFloat32Workload, NullWorkload>(descriptor, info);
}
+void NeonWorkloadFactory::Finalize()
+{
+ m_MemoryManager.Finalize();
+}
+
#else // Compiled without ArmCompute libs
+NeonWorkloadFactory::NeonWorkloadFactory()
+{
+}
+
std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
TensorShape const& subTensorShape,
unsigned int const* subTensorOrigin) const
@@ -355,6 +371,9 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDesc
return nullptr;
}
+void NeonWorkloadFactory::Finalize()
+{}
+
#endif
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloadFactory.hpp b/src/armnn/backends/NeonWorkloadFactory.hpp
index 0e39cfe8b1..66a69f3baf 100644
--- a/src/armnn/backends/NeonWorkloadFactory.hpp
+++ b/src/armnn/backends/NeonWorkloadFactory.hpp
@@ -4,7 +4,7 @@
//
#pragma once
-#include "WorkloadFactory.hpp"
+#include "AclBaseMemoryManager.hpp"
#include "OutputHandler.hpp"
#include <boost/core/ignore_unused.hpp>
@@ -16,7 +16,7 @@ namespace armnn
class NeonWorkloadFactory : public IWorkloadFactory
{
public:
- virtual ~NeonWorkloadFactory() { };
+ NeonWorkloadFactory();
virtual Compute GetCompute() const override { return Compute::CpuAcc; }
@@ -95,6 +95,12 @@ public:
virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+
+ void Finalize() override;
+
+private:
+
+ mutable AclBaseMemoryManager m_MemoryManager;
};
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloadUtils.cpp b/src/armnn/backends/NeonWorkloadUtils.cpp
index 0a108a8d38..e807d23d6c 100644
--- a/src/armnn/backends/NeonWorkloadUtils.cpp
+++ b/src/armnn/backends/NeonWorkloadUtils.cpp
@@ -11,8 +11,6 @@
#include "armnn/Utils.hpp"
#include "armnn/Exceptions.hpp"
-#include "Layers.hpp"
-
#include <cstring>
#include <boost/assert.hpp>
#include <boost/cast.hpp>
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
index 10c96d82a6..423f02bcb0 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
@@ -12,9 +12,38 @@
namespace armnn
{
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights);
+ arm_compute::TensorInfo aclBiasesInfo;
+ arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiasesInfo = BuildArmComputeTensorInfo(biases);
+ optionalAclBiasesInfo = &aclBiasesInfo;
+ }
+
+ arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+
+ return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
+ &aclWeightsInfo,
+ optionalAclBiasesInfo,
+ &aclOutputInfo,
+ layerInfo);
+}
+
template<armnn::DataType dataType>
NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: TypedWorkload<Convolution2dQueueDescriptor, dataType>(descriptor, info)
{
using arm_compute::NEDirectConvolutionLayer;
@@ -50,7 +79,7 @@ NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Con
if (preferDirectConvolution)
{
- auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>();
+ auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>(memoryManager);
directConvolutionLayer->configure(&input,
&m_KernelTensor,
optionalBiasTensor,
@@ -60,7 +89,7 @@ NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Con
}
else
{
- auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>();
+ auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager);
convolutionLayer->configure(&input,
&m_KernelTensor,
optionalBiasTensor,
@@ -81,4 +110,3 @@ template class NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>;
} //namespace armnn
-
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
index 98d075a5ea..d28d50d819 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
@@ -12,16 +12,27 @@
#include "backends/ArmComputeTensorUtils.hpp"
#include "backends/NeonLayerSupport.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
+arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases);
+
template<armnn::DataType dataType>
class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataType>
{
public:
using TypedWorkload<Convolution2dQueueDescriptor, dataType>::m_Data;
- NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void ValidateData() const {};
@@ -30,4 +41,5 @@ protected:
arm_compute::Tensor m_KernelTensor;
arm_compute::Tensor m_BiasTensor;
};
-} //namespace armnn \ No newline at end of file
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
index a8c5c63683..f20f2a4ac5 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
@@ -13,8 +13,8 @@ namespace armnn
using namespace armcomputetensorutils;
NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
- : NeonConvolution2dBaseWorkload(descriptor, info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager)
{
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -22,7 +22,6 @@ NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolu
}
}
-
void NeonConvolution2dFloat32Workload::Execute() const
{
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dFloat32Workload_Execute");
@@ -34,8 +33,5 @@ void NeonConvolution2dFloat32Workload::ValidateData() const
m_Data.ValidateInputsOutputs("NeonConvolution2dFloat32Workload", 1, 1);
}
-
-
} //namespace armnn
-
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
index f4d95d623f..56b0848efa 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
@@ -5,21 +5,25 @@
#pragma once
-#include <backends/NeonWorkloadUtils.hpp>
#include "NeonConvolution2dBaseWorkload.hpp"
+#include <backends/NeonWorkloadUtils.hpp>
+
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
namespace armnn
{
+
class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload<DataType::Float32>
{
public:
- NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
void Execute() const override;
void ValidateData() const override;
};
-} //namespace armnn
-
-
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
index ae20522361..fb91f7b7b2 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
@@ -5,12 +5,12 @@
#include "NeonConvolution2dUint8Workload.hpp"
-
namespace armnn
{
+
NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
- : NeonConvolution2dBaseWorkload(descriptor, info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager)
{
if (m_Data.m_Parameters.m_BiasEnabled)
{
@@ -21,7 +21,7 @@ NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution
void NeonConvolution2dUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, NeonConvolution2dUint8Workload_Execute);
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dUint8Workload_Execute");
m_ConvolutionLayer->run();
}
@@ -30,4 +30,4 @@ void NeonConvolution2dUint8Workload::ValidateData() const
m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1);
}
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
index 319d574b1e..5b977210c4 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp
@@ -7,13 +7,18 @@
#include "NeonConvolution2dBaseWorkload.hpp"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>
{
public:
- NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void ValidateData() const override;
virtual void Execute() const override;
@@ -22,6 +27,3 @@ private:
} //namespace armnnn
-
-
-
diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
index 54c4e4333c..e1c4448642 100644
--- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
@@ -7,14 +7,14 @@
#include "backends/CpuTensorHandle.hpp"
#include "backends/ArmComputeTensorUtils.hpp"
-
namespace armnn
{
using namespace armcomputetensorutils;
NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
+ , m_FullyConnectedLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonFullyConnectedFloat32Workload", 1, 1);
@@ -51,4 +51,3 @@ void NeonFullyConnectedFloat32Workload::Execute() const
} //namespace armnn
-
diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
index f9230f1d93..9c722dc573 100644
--- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
@@ -7,13 +7,18 @@
#include <backends/NeonWorkloadUtils.hpp>
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
class NeonFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor>
{
public:
- NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void Execute() const override;
private:
@@ -24,7 +29,3 @@ private:
} //namespace armnn
-
-
-
-
diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
index 085f58a219..9f79fa09de 100644
--- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
@@ -6,13 +6,13 @@
#include "NeonL2NormalizationFloat32Workload.hpp"
#include "backends/ArmComputeUtils.hpp"
-
namespace armnn
{
NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info)
+ , m_Layer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonL2NormalizationFloat32Workload", 1, 1);
diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
index 6cab28366a..2b4a1fef37 100644
--- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
@@ -7,20 +7,24 @@
#include <backends/NeonWorkloadUtils.hpp>
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
+
class NeonL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
{
public:
- NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void Execute() const override;
private:
// Purposely not a NEL2Normalize function. See constructor.
mutable arm_compute::NENormalizationLayer m_Layer;
};
-} //namespace armnn
-
-
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
index 739390d5a1..0fd0dcc420 100644
--- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
@@ -11,8 +11,9 @@ namespace armnn
{
NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<NormalizationQueueDescriptor>(descriptor, info)
+ , m_NormalizationLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonNormalizationFloat32Workload", 1, 1);
std::string reasonIfUnsupported;
diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
index 12a0fa80b2..24b6da8528 100644
--- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
@@ -7,13 +7,16 @@
#include <backends/NeonWorkloadUtils.hpp>
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
namespace armnn
{
class NeonNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
{
public:
- NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void Execute() const override;
private:
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
index 229562ece2..5e2925ca02 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
@@ -7,9 +7,11 @@
namespace armnn
{
+
NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ , m_SoftmaxLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonSoftmaxFloat32Workload", 1, 1);
@@ -25,7 +27,6 @@ void NeonSoftmaxFloat32Workload::Execute() const
ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSoftmaxFloat32Workload_Execute");
m_SoftmaxLayer.run();
}
-} //namespace armnn
-
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
index c466a0f9c6..91d25b47f8 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
@@ -7,13 +7,18 @@
#include <backends/NeonWorkloadUtils.hpp>
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
namespace armnn
{
class NeonSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
{
public:
- NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void Execute() const override;
private:
@@ -22,6 +27,3 @@ private:
} //namespace armnn
-
-
-
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
index a66b0343ff..eb4a23c13c 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
@@ -5,12 +5,14 @@
#include "NeonSoftmaxUint8Workload.hpp"
-
-
namespace armnn
{
-NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+
+NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ , m_SoftmaxLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1);
@@ -34,5 +36,6 @@ void NeonSoftmaxUint8Workload::Execute() const
m_SoftmaxLayer.run();
}
+
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp
index bccd82a850..19549ef3ef 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp
@@ -7,13 +7,16 @@
#include <backends/NeonWorkloadUtils.hpp>
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
namespace armnn
{
class NeonSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
{
public:
- NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
virtual void Execute() const override;
private:
@@ -22,6 +25,3 @@ private:
} //namespace armnn
-
-
-
diff --git a/src/armnn/backends/RefLayerSupport.cpp b/src/armnn/backends/RefLayerSupport.cpp
index 964c18e8ea..0b94656ded 100644
--- a/src/armnn/backends/RefLayerSupport.cpp
+++ b/src/armnn/backends/RefLayerSupport.cpp
@@ -77,11 +77,16 @@ bool IsConstantSupportedRef(const TensorInfo& output,
}
bool IsConvolution2dSupportedRef(const TensorInfo& input,
+ const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
std::string* reasonIfUnsupported)
{
ignore_unused(descriptor);
+ ignore_unused(output);
+ ignore_unused(weights);
+ ignore_unused(biases);
return IsSupportedForDataTypeRef(reasonIfUnsupported,
input.GetDataType(),
&TrueFunc<>,
diff --git a/src/armnn/backends/RefLayerSupport.hpp b/src/armnn/backends/RefLayerSupport.hpp
index 4a329aef34..9db1c14596 100644
--- a/src/armnn/backends/RefLayerSupport.hpp
+++ b/src/armnn/backends/RefLayerSupport.hpp
@@ -28,8 +28,10 @@ bool IsConstantSupportedRef(const TensorInfo& output,
std::string* reasonIfUnsupported = nullptr);
bool IsConvolution2dSupportedRef(const TensorInfo& input,
+ const TensorInfo& output,
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
+ const TensorInfo& biases,
std::string* reasonIfUnsupported = nullptr);
bool IsDepthwiseConvolutionSupportedRef(const TensorInfo& input,
diff --git a/src/armnn/backends/RefWorkloadFactory.cpp b/src/armnn/backends/RefWorkloadFactory.cpp
index 46502d8142..d7d498e89e 100644
--- a/src/armnn/backends/RefWorkloadFactory.cpp
+++ b/src/armnn/backends/RefWorkloadFactory.cpp
@@ -6,7 +6,6 @@
#include "RefWorkloadFactory.hpp"
#include "RefWorkloads.hpp"
#include "Layer.hpp"
-#include "Layers.hpp"
#include "MemCopyWorkload.hpp"
#include "MakeWorkloadHelper.hpp"
@@ -187,7 +186,6 @@ std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCop
#endif
default:
throw InvalidArgumentException("RefWorkloadFactory: Destination type not supported for MemCopy Workload.");
- return nullptr;
}
}
diff --git a/src/armnn/backends/RefWorkloads/ConvImpl.hpp b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
index ecc5b14687..8b66b0b7d2 100644
--- a/src/armnn/backends/RefWorkloads/ConvImpl.hpp
+++ b/src/armnn/backends/RefWorkloads/ConvImpl.hpp
@@ -57,6 +57,11 @@ static void ConvImpl(ConvData data,
int32_t outputOffset,
bool depthwise = false)
{
+ if (data.m_Parameters.m_BiasEnabled && !biasData)
+ {
+ throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
+ }
+
const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
const TensorInfo& outputInfo0 = GetTensorInfo(data.m_Outputs[0]);
const TensorInfo& filterInfo = data.m_Weight->GetTensorInfo();
@@ -65,8 +70,6 @@ static void ConvImpl(ConvData data,
unsigned int channelsInput = filterInfo.GetShape()[1];
unsigned int channelsOutput = depthwise ? channelsInput * depthMult : filterInfo.GetShape()[0];
- BOOST_ASSERT(data.m_Parameters.m_BiasEnabled == false || biasData != nullptr);
-
unsigned int batchSize = outputInfo0.GetShape()[0];
unsigned int heightOutput = outputInfo0.GetShape()[2];
unsigned int widthOutput = outputInfo0.GetShape()[3];
diff --git a/src/armnn/backends/RefWorkloads/Merger.hpp b/src/armnn/backends/RefWorkloads/Merger.hpp
index 476ced76be..7d1bfab557 100644
--- a/src/armnn/backends/RefWorkloads/Merger.hpp
+++ b/src/armnn/backends/RefWorkloads/Merger.hpp
@@ -21,7 +21,7 @@ void Merger(const MergerQueueDescriptor& data)
for (unsigned int index = 0 ; index < outputInfo0.GetNumElements(); ++index)
{
- unsigned int indices[MaxNumOfTensorDimensions];
+ unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
unsigned int indexRemainder = index;
unsigned int dimensionStride = outputInfo0.GetNumElements();
diff --git a/src/armnn/backends/RefWorkloads/Splitter.hpp b/src/armnn/backends/RefWorkloads/Splitter.hpp
index 74c4cb4e18..bd5da6cfe2 100644
--- a/src/armnn/backends/RefWorkloads/Splitter.hpp
+++ b/src/armnn/backends/RefWorkloads/Splitter.hpp
@@ -23,7 +23,7 @@ void Splitter(const SplitterQueueDescriptor& data)
for (unsigned int index = 0; index < inputInfo0.GetNumElements(); ++index)
{
- unsigned int indices[MaxNumOfTensorDimensions];
+ unsigned int indices[MaxNumOfTensorDimensions] = { 0 };
unsigned int indexRemainder = index;
unsigned int dimensionStride = inputInfo0.GetNumElements();
diff --git a/src/armnn/backends/WorkloadFactory.cpp b/src/armnn/backends/WorkloadFactory.cpp
index 32634a6d0f..4e94d7701c 100644
--- a/src/armnn/backends/WorkloadFactory.cpp
+++ b/src/armnn/backends/WorkloadFactory.cpp
@@ -10,7 +10,7 @@
#include "armnn/Types.hpp"
#include "armnn/LayerSupport.hpp"
#include "Layer.hpp"
-#include "Layers.hpp"
+#include "LayersFwd.hpp"
#include "CpuTensorHandle.hpp"
#include <boost/cast.hpp>
@@ -60,8 +60,50 @@ bool IWorkloadFactory::IsLayerSupported(Compute compute, const Layer& layer, Dat
{
auto cLayer = boost::polymorphic_downcast<const Convolution2dLayer*>(&layer);
const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
- result = IsConvolution2dSupported(compute, input, cLayer->GetParameters(),
- cLayer->m_Weight->GetTensorInfo(), reason, reasonCapacity);
+ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+ BOOST_ASSERT(cLayer->m_Weight.get() != nullptr);
+
+ const TensorInfo * biasInfo = nullptr;
+ static const TensorInfo dummyFloat32Bias(TensorShape({1,1,1,1}), DataType::Float32);
+ static const TensorInfo dummyQA8Bias(TensorShape({1,1,1,1}), DataType::Signed32);
+
+ const Convolution2dDescriptor& descriptor = cLayer->GetParameters();
+
+ if (descriptor.m_BiasEnabled)
+ {
+ BOOST_ASSERT(cLayer->m_Bias.get() != nullptr);
+ biasInfo = &(cLayer->m_Bias->GetTensorInfo());
+ }
+ else
+ {
+ // If biases are not enabled I pass a dummy tensorinfo for the validation
+ switch(input.GetDataType())
+ {
+ case DataType::Float32:
+ {
+ biasInfo = &dummyFloat32Bias;
+ break;
+ }
+ case DataType::QuantisedAsymm8:
+ {
+ biasInfo = &dummyQA8Bias;
+ break;
+ }
+ default:
+ {
+ BOOST_ASSERT_MSG(false, "Unexpected input type");
+ }
+ }
+ }
+
+ result = IsConvolution2dSupported(compute,
+ input,
+ output,
+ descriptor,
+ cLayer->m_Weight->GetTensorInfo(),
+ *biasInfo,
+ reason,
+ reasonCapacity);
break;
}
case LayerType::MemCopy:
@@ -211,4 +253,4 @@ bool IWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, s
return IsLayerSupported(layer.GetComputeDevice(), layer, dataType, outReasonIfUnsupported);
}
-} \ No newline at end of file
+}
diff --git a/src/armnn/backends/WorkloadFactory.hpp b/src/armnn/backends/WorkloadFactory.hpp
index d3f5bfb40f..5791c1b46f 100644
--- a/src/armnn/backends/WorkloadFactory.hpp
+++ b/src/armnn/backends/WorkloadFactory.hpp
@@ -22,8 +22,11 @@ public:
virtual Compute GetCompute() const = 0;
+ /// Informs the memory manager that the network is finalized and ready for execution.
+ virtual void Finalize() { }
+
static bool IsLayerSupported(Compute compute, const Layer& layer, DataType dataType,
- std::string& outReasonIfUnsupported);
+ std::string& outReasonIfUnsupported);
static bool IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported);
virtual bool SupportsSubTensors() const = 0;
@@ -102,4 +105,4 @@ public:
const WorkloadInfo& info) const = 0;
};
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/test/ArmComputeCl.cpp b/src/armnn/backends/test/ArmComputeCl.cpp
index c45a82db63..ae42d03ee3 100644
--- a/src/armnn/backends/test/ArmComputeCl.cpp
+++ b/src/armnn/backends/test/ArmComputeCl.cpp
@@ -62,6 +62,9 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvoluti
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false)
+
// Splitter
BOOST_AUTO_TEST_CASE(SimpleSplitter)
{
diff --git a/src/armnn/backends/test/ArmComputeNeon.cpp b/src/armnn/backends/test/ArmComputeNeon.cpp
index a81b7cdcd7..0a78b75e2e 100644
--- a/src/armnn/backends/test/ArmComputeNeon.cpp
+++ b/src/armnn/backends/test/ArmComputeNeon.cpp
@@ -88,6 +88,9 @@ ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvoluti
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false)
+
namespace
{
@@ -134,6 +137,10 @@ BOOST_AUTO_TEST_CASE(DepthwiseConv2dUtils)
// Supported shape 2x2
armnn::TensorInfo weightsInfo2x2({ 1, 1, 2, 2 }, armnn::DataType::Float32);
BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1), weightsInfo2x2));
+
+ // Asymmetric padding
+ BOOST_TEST(armnn::IsDepthwiseConvolutionSupportedNeon(inputInfo, MakeDepthwiseConv2dDesc(1, 1, 1, 1, 2, 1, 2),
+ weightsInfo3x3));
}
// Pooling
@@ -235,6 +242,8 @@ ARMNN_AUTO_TEST_CASE(AddBroadcast1Element, AdditionBroadcast1ElementTest)
// Mul
ARMNN_AUTO_TEST_CASE(SimpleMultiplication, MultiplicationTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1Element, MultiplicationBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVector, MultiplicationBroadcast1DVectorTest)
// Batch Norm
ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest)
diff --git a/src/armnn/backends/test/Conv2dTestImpl.hpp b/src/armnn/backends/test/Conv2dTestImpl.hpp
index 0c0511b234..0c34beaa33 100644
--- a/src/armnn/backends/test/Conv2dTestImpl.hpp
+++ b/src/armnn/backends/test/Conv2dTestImpl.hpp
@@ -60,8 +60,6 @@ void ApplyBias(std::vector<T>& v, float vScale, int32_t vOffset,
}
}
-
-
template<typename T, typename B>
LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
const boost::multi_array<T, 4>& input,
@@ -87,6 +85,8 @@ LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl
unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
bool biasEnabled = bias.size() > 0;
@@ -102,7 +102,7 @@ LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl
armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>());
armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth},
armnn::GetDataType<T>());
- armnn::TensorInfo kernelDesc({outputChannels, inputChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
+ armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
// Set quantization parameters if the requested type is a quantized type.
@@ -186,6 +186,120 @@ LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workl
CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+ workloadFactory.Finalize();
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+ return ret;
+}
+
+template<typename T, typename B>
+LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestImpl(armnn::IWorkloadFactory& workloadFactory,
+ const boost::multi_array<T, 4>& input,
+ const boost::multi_array<T, 4>& kernel,
+ const boost::multi_array<B, 1>& bias,
+ const boost::multi_array<T, 4>& outputExpected,
+ float qScale,
+ int32_t qOffset,
+ uint32_t padLeft = 0,
+ uint32_t padTop = 0,
+ uint32_t padRight = 0,
+ uint32_t padBottom = 0,
+ uint32_t strideX = 1,
+ uint32_t strideY = 1)
+{
+ unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
+ unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
+ unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[2]);
+ unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[3]);
+ unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+ unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
+ unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
+ unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
+ unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
+ unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
+
+ // If a bias is used, its size must equal the number of output channels
+ bool biasEnabled = bias.size() > 0;
+ BOOST_ASSERT(!biasEnabled || bias.size() == outputChannels);
+
+ // create the tensors
+ armnn::TensorInfo inputTensorInfo({inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>());
+ armnn::TensorInfo outputTensorInfo({outputNum, outputChannels, outputHeight, outputWidth},
+ armnn::GetDataType<T>());
+ armnn::TensorInfo kernelDesc({kernelChanMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
+ armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
+
+ // Set quantization parameters if the requested type is a quantized type.
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(qScale);
+ inputTensorInfo.SetQuantizationOffset(qOffset);
+ outputTensorInfo.SetQuantizationScale(qScale);
+ outputTensorInfo.SetQuantizationOffset(qOffset);
+ kernelDesc.SetQuantizationScale(qScale);
+ kernelDesc.SetQuantizationOffset(qOffset);
+ biasDesc.SetQuantizationScale(qScale*qScale);
+ biasDesc.SetQuantizationOffset(0);
+ }
+
+ // Construct the input data
+ std::vector<T> inputData;
+ inputData.assign(input.data(), input.data() + inputChannels*inputHeight*inputWidth);
+ auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+ // Construct the output data, with bias applied, as appropriate
+ std::vector<T> outputData;
+ outputData.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
+ if (biasEnabled)
+ {
+ std::vector<T> biasV;
+ biasV.assign(bias.data(), bias.data() + outputChannels);
+ ApplyBias(outputData, outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
+ biasV, biasDesc.GetQuantizationScale(), biasDesc.GetQuantizationOffset(),
+ outputWidth, outputHeight);
+ }
+
+ LayerTestResult<T, 4> ret(outputTensorInfo);
+ ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+ AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+
+ armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+ if (biasEnabled)
+ {
+ AllocateAndCopyDataToITensorHandle(&biasTensor, &bias[0]);
+ }
+
+ armnn::DepthwiseConvolution2dQueueDescriptor data;
+ data.m_Weight = &weightsTensor;
+ data.m_Bias = &biasTensor; // still set this whether or not bias is enabled - can be a source of bugs
+ data.m_Parameters.m_StrideX = strideX;
+ data.m_Parameters.m_StrideY = strideY;
+ data.m_Parameters.m_PadLeft = padLeft;
+ data.m_Parameters.m_PadRight = padRight;
+ data.m_Parameters.m_PadTop = padTop;
+ data.m_Parameters.m_PadBottom = padBottom;
+ data.m_Parameters.m_BiasEnabled = biasEnabled;
+
+ armnn::WorkloadInfo info;
+ AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -306,6 +420,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dDepthMul1TestImpl(armnn::IWorkloadFa
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -478,6 +593,7 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -485,8 +601,6 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo
return ret;
}
-
-
template<typename T>
LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory,
float qScale,
@@ -595,6 +709,7 @@ LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFact
CopyDataToITensorHandle(inputHandle.get(), inputData.data());
+ workloadFactory.Finalize();
workload->Execute();
// output
@@ -692,7 +807,9 @@ LayerTestResult<T,4> CompareConvolution2dTestImpl(armnn::IWorkloadFactory& workl
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -792,7 +909,9 @@ LayerTestResult<T, 4> CompareDepthwiseConvolution2dTestImpl(armnn::IWorkloadFact
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
diff --git a/src/armnn/backends/test/CreateWorkloadCl.cpp b/src/armnn/backends/test/CreateWorkloadCl.cpp
index 3f320d80e9..f83bb12bbe 100644
--- a/src/armnn/backends/test/CreateWorkloadCl.cpp
+++ b/src/armnn/backends/test/CreateWorkloadCl.cpp
@@ -23,7 +23,6 @@ BOOST_AUTO_TEST_CASE(CreateActivationWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateActivationWorkloadTest<ClActivationFloat32Workload>(factory, graph);
@@ -40,7 +39,6 @@ BOOST_AUTO_TEST_CASE(CreateAdditionWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateAdditionWorkloadTest<ClAdditionFloat32Workload>(factory, graph);
@@ -58,7 +56,6 @@ BOOST_AUTO_TEST_CASE(CreateBatchNormalizationWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloat32Workload>(factory, graph);
@@ -136,7 +133,6 @@ BOOST_AUTO_TEST_CASE(CreateMultiplicationWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload =
CreateMultiplicationWorkloadTest<ClMultiplicationFloat32Workload>(factory, graph);
@@ -155,7 +151,6 @@ BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateNormalizationWorkloadTest<ClNormalizationFloat32Workload>(factory, graph);
@@ -172,7 +167,6 @@ BOOST_AUTO_TEST_CASE(CreatePooling2dWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreatePooling2dWorkloadTest<ClPooling2dFloat32Workload>(factory, graph);
@@ -190,7 +184,6 @@ static void ClCreateReshapeWorkloadTest()
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateReshapeWorkloadTest<ReshapeWorkloadType>(factory, graph);
@@ -217,7 +210,6 @@ BOOST_AUTO_TEST_CASE(CreateSoftmaxWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateSoftmaxWorkloadTest<ClSoftmaxFloat32Workload>(factory, graph);
@@ -234,20 +226,24 @@ BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateSplitterWorkloadTest<ClSplitterFloat32Workload>(factory, graph);
// check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest)
SplitterQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
- BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {7}));
- auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
- BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {4}));
+ BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
+
auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
- BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {1}));
+ BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
+
auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
- BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2}));
+ BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
+
+ auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+ // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1
+ // we are raising this difference between the NEON and CL libs as an issue with the compute library team
+ BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7}));
}
BOOST_AUTO_TEST_CASE(CreateSplitterMerger)
@@ -260,7 +256,6 @@ BOOST_AUTO_TEST_CASE(CreateSplitterMerger)
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workloads =
CreateSplitterMergerWorkloadTest<ClSplitterFloat32Workload, ClMergerFloat32Workload>(factory, graph);
@@ -332,7 +327,6 @@ BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
{
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
CreateMemCopyWorkloads<CopyFromCpuToClWorkload,CopyFromClToCpuWorkload,IClTensorHandle>(factory);
}
@@ -340,7 +334,6 @@ BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload)
{
Graph graph;
ClWorkloadFactory factory;
- factory.LoadOpenClRuntime();
auto workload = CreateL2NormalizationWorkloadTest<ClL2NormalizationFloat32Workload>(factory, graph);
diff --git a/src/armnn/backends/test/CreateWorkloadNeon.cpp b/src/armnn/backends/test/CreateWorkloadNeon.cpp
index 807937ba2b..4d91fbfd31 100644
--- a/src/armnn/backends/test/CreateWorkloadNeon.cpp
+++ b/src/armnn/backends/test/CreateWorkloadNeon.cpp
@@ -214,13 +214,16 @@ BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
// check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest)
SplitterQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
- BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 7}, DataType::Float32)));
+ BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
+
auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
- BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 4}, DataType::Float32)));
+ BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
+
auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]);
- BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({1, 1}, DataType::Float32)));
+ BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
+
auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]);
- BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({1, 2}, DataType::Float32)));
+ BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
}
BOOST_AUTO_TEST_CASE(CreateSplitterMerger)
diff --git a/src/armnn/backends/test/CreateWorkloadRef.cpp b/src/armnn/backends/test/CreateWorkloadRef.cpp
index e0eacebe1a..abc46e4361 100644
--- a/src/armnn/backends/test/CreateWorkloadRef.cpp
+++ b/src/armnn/backends/test/CreateWorkloadRef.cpp
@@ -241,13 +241,16 @@ static void RefCreateSplitterWorkloadTest()
// check that outputs are as we expect them (see definition of CreateSplitterWorkloadTest)
SplitterQueueDescriptor queueDescriptor = workload->GetData();
auto inputHandle = boost::polymorphic_downcast<ConstCpuTensorHandle*>(queueDescriptor.m_Inputs[0]);
- BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 1, 7 }, SplitterWorkloadType::ms_DataType)));
+ BOOST_TEST((inputHandle->GetTensorInfo() == TensorInfo({ 5, 7, 7 }, SplitterWorkloadType::ms_DataType)));
+
auto outputHandle0 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[0]);
- BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 4 }, SplitterWorkloadType::ms_DataType)));
+ BOOST_TEST((outputHandle0->GetTensorInfo() == TensorInfo({ 1, 7, 7 }, SplitterWorkloadType::ms_DataType)));
+
auto outputHandle1 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[1]);
- BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 1, 1 }, SplitterWorkloadType::ms_DataType)));
+ BOOST_TEST((outputHandle1->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, SplitterWorkloadType::ms_DataType)));
+
auto outputHandle2 = boost::polymorphic_downcast<CpuTensorHandle*>(queueDescriptor.m_Outputs[2]);
- BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 1, 2 }, SplitterWorkloadType::ms_DataType)));
+ BOOST_TEST((outputHandle2->GetTensorInfo() == TensorInfo({ 2, 7, 7 }, SplitterWorkloadType::ms_DataType)));
}
BOOST_AUTO_TEST_CASE(CreateSplitterFloat32Workload)
diff --git a/src/armnn/backends/test/FullyConnectedTestImpl.hpp b/src/armnn/backends/test/FullyConnectedTestImpl.hpp
index 479da3fabc..d2379ec10e 100644
--- a/src/armnn/backends/test/FullyConnectedTestImpl.hpp
+++ b/src/armnn/backends/test/FullyConnectedTestImpl.hpp
@@ -10,9 +10,9 @@ LayerTestResult<T, 2> SimpleFullyConnectedTestImpl(
armnn::TensorInfo outputTensorInfo,
armnn::TensorInfo weightsDesc,
armnn::TensorInfo biasesDesc,
- boost::multi_array<T, 2> weights,
- boost::multi_array<B, 1> bias,
- boost::multi_array<T, 4> input,
+ boost::multi_array<T, 2>& weights,
+ boost::multi_array<B, 1>& bias,
+ boost::multi_array<T, 4>& input,
bool biasEnabled,
bool transposeWeights)
{
@@ -41,6 +41,7 @@ LayerTestResult<T, 2> SimpleFullyConnectedTestImpl(
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
diff --git a/src/armnn/backends/test/IsLayerSupportedTest.cpp b/src/armnn/backends/test/IsLayerSupportedTest.cpp
index 4b4c9f6099..af7ba923ec 100644
--- a/src/armnn/backends/test/IsLayerSupportedTest.cpp
+++ b/src/armnn/backends/test/IsLayerSupportedTest.cpp
@@ -9,7 +9,6 @@
#include "backends/CpuTensorHandle.hpp"
#include "backends/RefWorkloadFactory.hpp"
-#include <Layers.hpp>
#include <string>
#include <iostream>
@@ -67,4 +66,4 @@ BOOST_AUTO_TEST_CASE(IsLayerSupportedUint8Cl)
}
#endif //#ifdef ARMCOMPUTECL_ENABLED
-BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/armnn/backends/test/LayerTests.cpp b/src/armnn/backends/test/LayerTests.cpp
index 9eed2dbf78..a10e4bd7a0 100644
--- a/src/armnn/backends/test/LayerTests.cpp
+++ b/src/armnn/backends/test/LayerTests.cpp
@@ -6,8 +6,10 @@
#include "test/TensorHelpers.hpp"
#include "TensorCopyUtils.hpp"
+#include "Permute.hpp"
#include <boost/test/unit_test.hpp>
+#include <boost/assert.hpp>
#include "armnn/LayerSupport.hpp"
@@ -342,11 +344,11 @@ LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor
std::vector<T> myVec(outputDesc.GetNumElements(), 0);
boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
QuantizedVector<T>(qScale, qOffset, {
- -4723, -7044, -9324, -6253, -3542,
-7140, -10580, -13940, -9300, -5230,
-9590, -14120, -18520, -12290, -6860,
-9980, -14560, -18960, -12560, -7000,
-7518, -10904, -14144, -9318, -5152,
+ -5032, -7256, -9376, -6142, -3368,
})));
return SimpleConvolution2dTestImpl<T>(workloadFactory,
@@ -357,9 +359,79 @@ LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWor
qScale,
qOffset,
1, // padding left
- 2, // padding top
+ 1, // padding top
2, // padding right
- 1); // padding bottom
+ 2); // padding bottom
+}
+
+template<typename T>
+LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloadFactory& workloadFactory,
+ float qScale,
+ int32_t qOffset,
+ bool biasEnabled)
+{
+ // Use a single-batch 2-channel 5x5 image as input
+ armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>());
+ auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
+ QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
+ 0, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24,
+
+ 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49
+ })));
+
+ // Use a depth multiplier of 1 on a 2-channel 4x4 kernel
+ armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, armnn::GetDataType<T>());
+ auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
+ QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
+ 32, 31, 30, 29,
+ 28, 27, 26, 25,
+ 24, 23, 22, 21,
+ 20, 19, 18, 17,
+
+ 16, 15, 14, 13,
+ 12, 11, 10, 9,
+ 8, 7, 6, 5,
+ 4, 3, 2, 1
+ })));
+
+ // Expected output is 1 batch of a 2-channel 5x5 image
+ // calculated using the python tensorflow library with strideX=1, strideY=1
+ armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, armnn::GetDataType<T>());
+ boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
+ QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
+ 1062, 1580, 1850, 1530, 1117,
+ 2140, 3108, 3500, 2842, 2042,
+ 3580, 5068, 5460, 4342, 3062,
+ 3618, 5072, 5390, 4248, 2971,
+ 3074, 4282, 4510, 3533, 2457,
+ 1550, 2284, 2362, 1955, 1428,
+ 2910, 4206, 4342, 3528, 2536,
+ 3390, 4886, 5022, 4068, 2916,
+ 3566, 5056, 5182, 4133, 2922,
+ 3100, 4352, 4452, 3517, 2465
+ })));
+
+ return DepthwiseConvolution2dAsymmetricTestImpl<T>(workloadFactory,
+ input,
+ kernel,
+ GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
+ expectedOutput,
+ qScale,
+ qOffset,
+ 1, // padding left
+ 1, // padding top
+ 2, // padding right
+ 2, // padding bottom
+ 1, // strideX
+ 1); // strideY
}
LayerTestResult<float, 4>
@@ -385,6 +457,12 @@ LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFa
return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
}
+LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory,
+ bool biasEnabled)
+{
+ return DepthwiseConvolution2dAsymmetricTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled);
+}
+
LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
bool biasEnabled)
{
@@ -493,138 +571,85 @@ LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& wo
LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory)
{
- unsigned int outputWidth = 5;
+ unsigned int outputWidth = 3;
unsigned int outputHeight = 6;
unsigned int outputChannels = 3;
- unsigned int inputWidth1 = 2;
- unsigned int inputHeight1 = 2;
- unsigned int inputChannels1 = 3;
-
- unsigned int inputWidth2 = 2;
- unsigned int inputHeight2 = 4;
- unsigned int inputChannels2 = 3;
-
- unsigned int inputWidth3 = 3;
- unsigned int inputHeight3 = 6;
- unsigned int inputChannels3 = 2;
+ unsigned int inputWidth1 = 3;
+ unsigned int inputHeight1 = 6;
+ unsigned int inputChannels1 = 2;
- unsigned int inputWidth4 = 3;
- unsigned int inputHeight4 = 6;
- unsigned int inputChannels4 = 1;
+ unsigned int inputWidth2 = 3;
+ unsigned int inputHeight2 = 6;
+ unsigned int inputChannels2 = 1;
// Define the tensor descriptors
armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
- armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::Float32);
- armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::Float32);
LayerTestResult<float,3> ret(outputTensorInfo);
-
ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
- {
- 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
- 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
- 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
- 16.0f, 17.0f, 18.0f, 19.0f, 20.0f,
- 21.0f, 22.0f, 23.0f, 24.0f, 25.0f,
- 26.0f, 27.0f, 28.0f, 29.0f, 30.0f,
-
- 31.0f, 32.0f, 33.0f, 34.0f, 35.0f,
- 36.0f, 37.0f, 38.0f, 39.0f, 40.0f,
- 41.0f, 42.0f, 43.0f, 44.0f, 45.0f,
- 46.0f, 47.0f, 48.0f, 49.0f, 50.0f,
- 51.0f, 52.0f, 53.0f, 54.0f, 55.0f,
- 56.0f, 57.0f, 58.0f, 59.0f, 60.0f,
-
- 61.0f, 62.0f, 63.0f, 64.0f, 65.0f,
- 66.0f, 67.0f, 68.0f, 69.0f, 70.0f,
- 71.0f, 72.0f, 73.0f, 74.0f, 75.0f,
- 76.0f, 77.0f, 78.0f, 79.0f, 80.0f,
- 81.0f, 82.0f, 83.0f, 84.0f, 85.0f,
- 86.0f, 87.0f, 88.0f, 89.0f, 90.0f,
+ {
+ 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f,
+ 7.0f, 8.0f, 9.0f,
+ 10.0f, 11.0f, 12.0f,
+ 13.0f, 14.0f, 15.0f,
+ 16.0f, 17.0f, 18.0f,
+
+ 19.0f, 20.0f, 21.0f,
+ 22.0f, 23.0f, 24.0f,
+ 25.0f, 26.0f, 27.0f,
+ 28.0f, 29.0f, 30.0f,
+ 31.0f, 32.0f, 33.0f,
+ 34.0f, 35.0f, 36.0f,
+ 37.0f, 38.0f, 39.0f,
+ 40.0f, 41.0f, 42.0f,
+ 43.0f, 44.0f, 45.0f,
+ 46.0f, 47.0f, 48.0f,
+ 49.0f, 50.0f, 51.0f,
+ 52.0f, 53.0f, 54.0f,
})
);
-
auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
{
- 1.0f, 2.0f,
- 6.0f, 7.0f,
-
- 31.0f, 32.0f,
- 36.0f, 37.0f,
+ 1.0f, 2.0f, 3.0f,
+ 4.0f, 5.0f, 6.0f,
+ 7.0f, 8.0f, 9.0f,
+ 10.0f, 11.0f, 12.0f,
+ 13.0f, 14.0f, 15.0f,
+ 16.0f, 17.0f, 18.0f,
- 61.0f, 62.0f,
- 66.0f, 67.0f,
+ 19.0f, 20.0f, 21.0f,
+ 22.0f, 23.0f, 24.0f,
+ 25.0f, 26.0f, 27.0f,
+ 28.0f, 29.0f, 30.0f,
+ 31.0f, 32.0f, 33.0f,
+ 34.0f, 35.0f, 36.0f,
})
);
auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
{
- 11.0f, 12.0f,
- 16.0f, 17.0f,
- 21.0f, 22.0f,
- 26.0f, 27.0f,
-
- 41.0f, 42.0f,
- 46.0f, 47.0f,
- 51.0f, 52.0f,
- 56.0f, 57.0f,
-
- 71.0f, 72.0f,
- 76.0f, 77.0f,
- 81.0f, 82.0f,
- 86.0f, 87.0f,
- })
- );
-
- auto input3 = MakeTensor<float, 3>(inputTensorInfo3, std::vector<float>(
- {
- 3.0f, 4.0f, 5.0f,
- 8.0f, 9.0f, 10.0f,
- 13.0f, 14.0f, 15.0f,
- 18.0f, 19.0f, 20.0f,
- 23.0f, 24.0f, 25.0f,
- 28.0f, 29.0f, 30.0f,
-
- 33.0f, 34.0f, 35.0f,
- 38.0f, 39.0f, 40.0f,
+ 37.0f, 38.0f, 39.0f,
+ 40.0f, 41.0f, 42.0f,
43.0f, 44.0f, 45.0f,
- 48.0f, 49.0f, 50.0f,
- 53.0f, 54.0f, 55.0f,
- 58.0f, 59.0f, 60.0f,
- })
- );
-
-
- auto input4 = MakeTensor<float, 3>(inputTensorInfo4, std::vector<float>(
- {
- 63.0f, 64.0f, 65.0f,
- 68.0f, 69.0f, 70.0f,
- 73.0f, 74.0f, 75.0f,
- 78.0f, 79.0f, 80.0f,
- 83.0f, 84.0f, 85.0f,
- 88.0f, 89.0f, 90.0f,
+ 46.0f, 47.0f, 48.0f,
+ 49.0f, 50.0f, 51.0f,
+ 52.0f, 53.0f, 54.0f,
})
);
std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0]
armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
- std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of input[1]
+ std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //extent of the window is defined by size of input[1]
armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
- std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of input[2]
- armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3);
-
- std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of input[3]
- armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4);
-
-
std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
bool subTensorsSupported = workloadFactory.SupportsSubTensors();
@@ -639,43 +664,25 @@ LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory)
workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
workloadFactory.CreateTensorHandle(inputTensorInfo2);
- std::unique_ptr<armnn::ITensorHandle> inputHandle3 =
- subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) :
- workloadFactory.CreateTensorHandle(inputTensorInfo3);
-
- std::unique_ptr<armnn::ITensorHandle> inputHandle4 =
- subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) :
- workloadFactory.CreateTensorHandle(inputTensorInfo4);
-
-
armnn::MergerQueueDescriptor data;
armnn::WorkloadInfo info;
AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
- AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get());
- AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get());
AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
data.m_ViewOrigins.push_back(window1);
data.m_ViewOrigins.push_back(window2);
- data.m_ViewOrigins.push_back(window3);
- data.m_ViewOrigins.push_back(window4);
std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
inputHandle1->Allocate();
inputHandle2->Allocate();
- inputHandle3->Allocate();
- inputHandle4->Allocate();
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
- CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]);
- CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
@@ -765,6 +772,7 @@ LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory)
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -841,6 +849,7 @@ LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloa
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -912,6 +921,7 @@ LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory&
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -996,7 +1006,9 @@ LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFa
CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -1043,6 +1055,7 @@ LayerTestResult<float,4> MultiplicationTestHelper(armnn::IWorkloadFactory& workl
CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -1185,7 +1198,9 @@ LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& work
CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]);
CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get());
@@ -1264,7 +1279,9 @@ LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadF
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -1273,23 +1290,299 @@ LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadF
return ret;
}
-void Concatenate(armnn::IWorkloadFactory& workloadFactory,
- std::initializer_list<const armnn::TensorInfo> inputTensorInfos,
- std::initializer_list<void*> inputs,
- const armnn::TensorInfo& outputTensorInfo,
- void* output,
- unsigned int concatDim)
-{
- armnn::MergerQueueDescriptor queueDescriptor;
+template<typename T>
+void PermuteTensorData(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::PermutationVector& mappings,
+ armnn::TensorInfo & inputTensorInfo,
+ const T * inputData,
+ std::vector<T>& outputData)
+{
+ BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
+ if (inputData == nullptr)
+ {
+ // Nullptr is an error in the test. By returning without doing the concatenation
+ // I expect the caller to fail the test. It still makes sense to report this as
+ // an assert for Debug builds.
+ return;
+ }
+
+ armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::PermuteQueueDescriptor queueDescriptor;
+ queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings};
+ armnn::WorkloadInfo workloadInfo;
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), inputData);
+
+ workload->Execute();
+
+ outputData.resize(outputTensorInfo.GetNumElements());
+ CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
+ inputTensorInfo = outputTensorInfo;
+}
+armnn::OriginsDescriptor CreateMergerDescriptorForConcatenation(
+ const std::vector<armnn::TensorInfo> & inputTensorInfos,
+ unsigned int concatDim)
+{
std::vector<armnn::TensorShape> shapes;
shapes.reserve(inputTensorInfos.size());
for (const armnn::TensorInfo& it: inputTensorInfos)
{
shapes.push_back(it.GetShape());
}
- armnn::OriginsDescriptor viewsDescriptor = armnn::CreateMergerDescriptorForConcatenation(shapes.begin(),
- shapes.end(), concatDim);
+
+ return armnn::CreateMergerDescriptorForConcatenation(shapes.begin(),
+ shapes.end(),
+ concatDim);
+}
+
+//
+// Concatenation is only supported for N and C dimensions for NCHW. In case of
+// <4 dimensions we need to make sure that the concat dimensions is at least
+// the 3rd slowest iterating one.
+//
+
+bool NeedPermuteForConcat(
+ const std::vector<armnn::TensorInfo> & inputTensorInfos,
+ unsigned int concatDim)
+{
+ // See note above. Additionally we expect the input shapes to have the
+ // same number of dimensions.
+ unsigned int nDimensions = 0;
+
+ // determine the number of dimensions as well as sanity check them
+ // agains test implementation issues
+ for (auto && tensorInfo : inputTensorInfos)
+ {
+ if (!nDimensions)
+ {
+ nDimensions = tensorInfo.GetShape().GetNumDimensions();
+ }
+ else
+ {
+ BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
+ "Input shapes must have the same number of dimensions");
+ }
+ }
+
+ return (nDimensions-concatDim) < 3;
+}
+
+armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape)
+{
+ unsigned int numDims = inputShape.GetNumDimensions();
+ if (numDims >= 3)
+ {
+ // Nothing to do if the inputShape has at least 3 dimensions.
+ return inputShape;
+ }
+
+ std::vector<unsigned int> newDims(size_t(3), 1u);
+ unsigned int expandedBy = 3 - numDims;
+ for (unsigned int i=0; i<numDims; ++i)
+ {
+ newDims[expandedBy+i] = inputShape[i];
+ }
+ return armnn::TensorShape(3u, &newDims[0]);
+}
+
+void Generate3dPermuteVectorForConcat(
+ unsigned int numDimensions,
+ unsigned int & concatDim,
+ std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations)
+{
+ BOOST_ASSERT_MSG(numDimensions <= 3,
+ "Only dimensions 1,2 and 3 are supported by this helper");
+
+ unsigned int expandedBy = 3 - numDimensions;
+ unsigned int expandedConcatAxis = concatDim + expandedBy;
+
+ if (expandedConcatAxis == 2)
+ {
+ concatDim = 0;
+ armnn::PermutationVector forwardPermutation({1, 2, 0});
+ armnn::PermutationVector reversePermutation({2, 0, 1});
+ permutations = std::make_pair(forwardPermutation, reversePermutation);
+ }
+ else if (expandedConcatAxis == 1)
+ {
+ concatDim = 0;
+ armnn::PermutationVector forwardPermutation({2, 0, 1});
+ armnn::PermutationVector reversePermutation({1, 2, 0});
+ permutations = std::make_pair(forwardPermutation, reversePermutation);
+ }
+ else
+ {
+ BOOST_ASSERT(expandedConcatAxis == 0);
+ concatDim = 0;
+ }
+}
+
+//
+// Permute the input tensors so we can do a supported concatenation.
+// Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
+// at the front. Finally this function tells what the output shape
+// of the permuted concatenated tensor is going to be.
+//
+template <typename T>
+void PermuteInputsForConcat(
+ armnn::IWorkloadFactory& workloadFactory,
+ std::vector<armnn::TensorInfo> & inputTensorInfos,
+ std::vector<T *> & inputData,
+ std::vector<std::vector<T>> & inputDataStorage,
+ armnn::PermutationVector & permuteVector,
+ unsigned int & concatDim,
+ armnn::TensorInfo & outputTensorInfo)
+{
+ BOOST_ASSERT_MSG(inputTensorInfos.size() > 1,
+ "Expecting more than one tensor to be concatenated here");
+
+ unsigned int numDims = 0;
+ unsigned int nthInput = 0;
+ const armnn::PermutationVector identity({0, 1, 2});
+
+ std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations =
+ std::make_pair(identity, identity);
+
+ inputDataStorage.resize(inputData.size());
+
+ for (auto && tensorInfo : inputTensorInfos)
+ {
+ if (numDims == 0)
+ {
+ numDims = tensorInfo.GetShape().GetNumDimensions();
+ Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
+ // store the reverese permutation
+ permuteVector = permutations.second;
+ BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity),
+ "Test logic error, we don't need permutation, so we shouldn't arrive here");
+ }
+ else
+ {
+ BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
+ "All inputs must have the same number of dimensions");
+ }
+
+ armnn::TensorInfo newTensorInfo = tensorInfo;
+ newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
+
+ PermuteTensorData<T>(workloadFactory,
+ permutations.first,
+ newTensorInfo,
+ inputData[nthInput],
+ inputDataStorage[nthInput]);
+
+ inputData[nthInput] = inputDataStorage[nthInput].data();
+ inputTensorInfos[nthInput] = newTensorInfo;
+
+ ++nthInput;
+ }
+
+ outputTensorInfo.SetShape(
+ armnnUtils::Permuted(
+ ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
+ permutations.first));
+}
+
+
+//
+// This is the pair of PermuteInputsForConcat(...) which permutes back
+// the output of the concatenation so we can check against an expected
+// output.
+//
+template <typename T>
+void PermuteOutputForConcat(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::TensorInfo & tensorInfo,
+ const armnn::PermutationVector & permuteVector,
+ std::unique_ptr<armnn::ITensorHandle> && inputDataHandle,
+ T * data)
+{
+ BOOST_ASSERT_MSG(data != nullptr, "data must not be null");
+ if (data == nullptr)
+ {
+ // Nullptr is an error in the test. By returning without doing the permutation
+ // I expect the caller to fail the test. It still makes sense to report this as
+ // an assert for Debug builds.
+ return;
+ }
+
+ armnn::TensorInfo resultTensorInfo = tensorInfo;
+ std::vector<T> inputData(tensorInfo.GetNumElements());
+ std::vector<T> outputData;
+
+ CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
+
+ PermuteTensorData<T>(workloadFactory,
+ permuteVector,
+ resultTensorInfo,
+ &inputData[0],
+ outputData);
+
+ ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
+}
+
+template <typename T>
+void Concatenate(armnn::IWorkloadFactory& workloadFactory,
+ std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig,
+ std::initializer_list<T *> inputsOrig,
+ const armnn::TensorInfo& outputTensorInfoOrig,
+ T * output,
+ unsigned int concatDim)
+{
+ BOOST_ASSERT_MSG(output != nullptr, "output must not be null");
+ if (output == nullptr)
+ {
+ // Nullptr is an error in the test. By returning without doing the permutation
+ // I expect the caller to fail the test. It still makes sense to report this as
+ // an assert for Debug builds.
+ return;
+ }
+
+ armnn::MergerQueueDescriptor queueDescriptor;
+
+ // save a copy of the parameters which we might need to change
+ std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
+ std::vector<T *> inputs = inputsOrig;
+ armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig;
+
+ armnn::PermutationVector permuteVector{0, 1, 2};
+
+ // hold and automatically release memory for the reshaped input data
+ std::vector<std::vector<T>> tmpInputDataStorage;
+
+ const size_t inputCount = inputTensorInfos.size();
+
+ bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
+
+ if (needPermuteForConcat)
+ {
+ //
+ // We need to permute the inputs, because concatenation along
+ // the requested axis is not supported
+ //
+ PermuteInputsForConcat<T>(workloadFactory,
+ inputTensorInfos,
+ inputs,
+ tmpInputDataStorage,
+ permuteVector,
+ concatDim,
+ outputTensorInfo);
+ }
+
+ armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim);
queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
@@ -1298,8 +1591,6 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory,
viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
}
- const size_t inputCount = inputTensorInfos.size();
-
std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
@@ -1308,7 +1599,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory,
const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
for (unsigned int i = 0; i < inputCount; ++i)
{
- const armnn::TensorInfo& inputTensorInfo = inputTensorInfos.begin()[i];
+ const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i];
std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ?
workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(),
@@ -1322,7 +1613,7 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory,
for (unsigned int i = 0; i < inputCount; ++i)
{
- AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos.begin()[i], inputHandles[i].get());
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
}
AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
@@ -1339,12 +1630,25 @@ void Concatenate(armnn::IWorkloadFactory& workloadFactory,
unsigned int nextInputId = 0;
for (auto& inputHandle : inputHandles)
{
- CopyDataToITensorHandle(inputHandle.get(), *(inputs.begin() + nextInputId++));
+ CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
+ ++nextInputId;
}
+ workloadFactory.Finalize();
workload->Execute();
- CopyDataFromITensorHandle(output, outputHandle.get());
+ if (needPermuteForConcat)
+ {
+ PermuteOutputForConcat<T>(workloadFactory,
+ outputTensorInfo,
+ permuteVector,
+ std::move(outputHandle),
+ output);
+ }
+ else
+ {
+ CopyDataFromITensorHandle(output, outputHandle.get());
+ }
}
template <typename T>
@@ -1362,7 +1666,7 @@ LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ inputTensorInfo, inputTensorInfo, inputTensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -1419,7 +1723,7 @@ LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ inputTensorInfo, inputTensorInfo, inputTensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -1524,7 +1828,7 @@ LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ input0TensorInfo, input1TensorInfo, input2TensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -1596,7 +1900,7 @@ LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ input0TensorInfo, input1TensorInfo, input2TensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -1693,7 +1997,7 @@ LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ inputTensorInfo, inputTensorInfo, inputTensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -1953,7 +2257,7 @@ LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ input0TensorInfo, input1TensorInfo, input2TensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -2091,7 +2395,7 @@ LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ input0TensorInfo, input1TensorInfo, input2TensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -2229,7 +2533,7 @@ LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadF
std::vector<T> output;
output.resize(outputTensorInfo.GetNumElements());
- Concatenate(workloadFactory,
+ Concatenate<T>(workloadFactory,
{ input0TensorInfo, input1TensorInfo, input2TensorInfo },
{ input0.data(), input1.data(), input2.data() },
outputTensorInfo,
@@ -2306,6 +2610,7 @@ LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2358,6 +2663,7 @@ LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& work
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2408,6 +2714,7 @@ LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workl
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2457,6 +2764,7 @@ LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2507,6 +2815,7 @@ LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2555,6 +2864,7 @@ LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workload
CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
@@ -2617,6 +2927,7 @@ LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2686,6 +2997,7 @@ LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2766,6 +3078,7 @@ LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -2929,6 +3242,7 @@ LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloa
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3022,6 +3336,7 @@ LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory,
outputHandle->Allocate();
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3040,32 +3355,22 @@ LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadF
LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory)
{
- unsigned int outputWidth = 5;
+ unsigned int outputWidth = 3;
unsigned int outputHeight = 6;
unsigned int outputChannels = 3;
- unsigned int inputWidth1 = 2;
- unsigned int inputHeight1 = 2;
- unsigned int inputChannels1 = 3;
+ unsigned int inputWidth1 = 3;
+ unsigned int inputHeight1 = 6;
+ unsigned int inputChannels1 = 2;
- unsigned int inputWidth2 = 2;
- unsigned int inputHeight2 = 4;
- unsigned int inputChannels2 = 3;
-
- unsigned int inputWidth3 = 3;
- unsigned int inputHeight3 = 6;
- unsigned int inputChannels3 = 2;
-
- unsigned int inputWidth4 = 3;
- unsigned int inputHeight4 = 6;
- unsigned int inputChannels4 = 1;
+ unsigned int inputWidth2 = 3;
+ unsigned int inputHeight2 = 6;
+ unsigned int inputChannels2 = 1;
// Define the tensor descriptors
armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::QuantisedAsymm8);
- armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::QuantisedAsymm8);
// Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize
const float scale = 0.13497836f;
@@ -3077,113 +3382,69 @@ LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFac
inputTensorInfo1.SetQuantizationOffset(offset);
inputTensorInfo2.SetQuantizationScale(scale);
inputTensorInfo2.SetQuantizationOffset(offset);
- inputTensorInfo3.SetQuantizationScale(scale);
- inputTensorInfo3.SetQuantizationOffset(offset);
- inputTensorInfo4.SetQuantizationScale(scale);
- inputTensorInfo4.SetQuantizationOffset(offset);
LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
- {
- 1, 2, 3, 4, 5,
- 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20,
- 21, 22, 23, 24, 25,
- 26, 27, 28, 29, 30,
-
- 31, 32, 33, 34, 35,
- 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45,
- 46, 47, 48, 49, 50,
- 51, 52, 53, 54, 55,
- 56, 57, 58, 59, 60,
-
- 61, 62, 63, 64, 65,
- 66, 67, 68, 69, 70,
- 71, 72, 73, 74, 75,
- 76, 77, 78, 79, 80,
- 81, 82, 83, 84, 85,
- 86, 87, 88, 89, 90,
- })
+ {
+ 1, 2, 3,
+ 4, 5, 6,
+ 7, 8, 9,
+ 10, 11, 12,
+ 13, 14, 15,
+ 16, 17, 18,
+
+ 19, 20, 21,
+ 22, 23, 24,
+ 25, 26, 27,
+ 28, 29, 30,
+ 31, 32, 33,
+ 34, 35, 36,
+
+ 37, 38, 39,
+ 40, 41, 42,
+ 43, 44, 45,
+ 46, 47, 48,
+ 49, 50, 51,
+ 52, 53, 54,
+ })
);
-
auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
{
- 1, 2,
- 6, 7,
-
- 31, 32,
- 36, 37,
+ 1, 2, 3,
+ 4, 5, 6,
+ 7, 8, 9,
+ 10, 11, 12,
+ 13, 14, 15,
+ 16, 17, 18,
- 61, 62,
- 66, 67,
+ 19, 20, 21,
+ 22, 23, 24,
+ 25, 26, 27,
+ 28, 29, 30,
+ 31, 32, 33,
+ 34, 35, 36,
})
);
auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
{
- 11, 12,
- 16, 17,
- 21, 22,
- 26, 27,
-
- 41, 42,
- 46, 47,
- 51, 52,
- 56, 57,
-
- 71, 72,
- 76, 77,
- 81, 82,
- 86, 87,
- })
- );
-
- auto input3 = MakeTensor<uint8_t, 3>(inputTensorInfo3, std::vector<uint8_t>(
- {
- 3, 4, 5,
- 8, 9, 10,
- 13, 14, 15,
- 18, 19, 20,
- 23, 24, 25,
- 28, 29, 30,
-
- 33, 34, 35,
- 38, 39, 40,
+ 37, 38, 39,
+ 40, 41, 42,
43, 44, 45,
- 48, 49, 50,
- 53, 54, 55,
- 58, 59, 60,
- })
- );
-
-
- auto input4 = MakeTensor<uint8_t, 3>(inputTensorInfo4, std::vector<uint8_t>(
- {
- 63, 64, 65,
- 68, 69, 70,
- 73, 74, 75,
- 78, 79, 80,
- 83, 84, 85,
- 88, 89, 90,
+ 46, 47, 48,
+ 49, 50, 51,
+ 52, 53, 54,
})
);
std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0]
armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
- std::vector<unsigned int> wOrigin2 = { 0, 2, 0 }; //extent of the window is defined by size of input[1]
+ std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //extent of the window is defined by size of input[1]
armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
- std::vector<unsigned int> wOrigin3 = { 0, 0, 2 }; //extent of the window is defined by size of input[2]
- armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3);
-
- std::vector<unsigned int> wOrigin4 = { 2, 0, 2 }; //extent of the window is defined by size of input[3]
- armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4);
-
std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
@@ -3199,43 +3460,26 @@ LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFac
workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
workloadFactory.CreateTensorHandle(inputTensorInfo2);
- std::unique_ptr<armnn::ITensorHandle> inputHandle3 =
- subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) :
- workloadFactory.CreateTensorHandle(inputTensorInfo3);
-
- std::unique_ptr<armnn::ITensorHandle> inputHandle4 =
- subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) :
- workloadFactory.CreateTensorHandle(inputTensorInfo4);
-
armnn::MergerQueueDescriptor data;
armnn::WorkloadInfo info;
AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
- AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get());
- AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get());
AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
data.m_ViewOrigins.push_back(window1);
data.m_ViewOrigins.push_back(window2);
- data.m_ViewOrigins.push_back(window3);
- data.m_ViewOrigins.push_back(window4);
std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
inputHandle1->Allocate();
inputHandle2->Allocate();
- inputHandle3->Allocate();
- inputHandle4->Allocate();
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
- CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]);
- CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
@@ -3310,6 +3554,7 @@ LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadF
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3371,6 +3616,7 @@ LayerTestResult<uint8_t, 4> MultiplicationUint8TestHelper(armnn::IWorkloadFactor
CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3531,6 +3777,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory&
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3588,6 +3835,7 @@ LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactor
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3643,6 +3891,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3696,6 +3945,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory&
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
@@ -3751,6 +4001,7 @@ LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory&
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
diff --git a/src/armnn/backends/test/LayerTests.hpp b/src/armnn/backends/test/LayerTests.hpp
index 36e73e461c..2d543d61de 100644
--- a/src/armnn/backends/test/LayerTests.hpp
+++ b/src/armnn/backends/test/LayerTests.hpp
@@ -67,6 +67,9 @@ LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& wo
LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
bool biasEnabled);
+LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(armnn::IWorkloadFactory& workloadFactory,
+ bool biasEnabled);
+
LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
bool forceNoPadding);
LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory,
diff --git a/src/armnn/backends/test/MemCopyTests.cpp b/src/armnn/backends/test/MemCopyTests.cpp
index 8e4dae35f2..32331789e9 100644
--- a/src/armnn/backends/test/MemCopyTests.cpp
+++ b/src/armnn/backends/test/MemCopyTests.cpp
@@ -24,7 +24,7 @@ BOOST_AUTO_TEST_SUITE(MemCopyTestSuite)
void MemCopyTest(armnn::IWorkloadFactory& srcWorkloadFactory, armnn::IWorkloadFactory& dstWorkloadFactory,
bool withSubtensors)
{
- const std::array<unsigned int, 4> shapeData = { 1u, 1u, 6u, 5u };
+ const std::array<unsigned int, 4> shapeData = { { 1u, 1u, 6u, 5u } };
const armnn::TensorShape tensorShape(4, shapeData.data());
const armnn::TensorInfo tensorInfo(tensorShape, armnn::DataType::Float32);
boost::multi_array<float, 4> inputData = MakeTensor<float, 4>(tensorInfo, std::vector<float>(
diff --git a/src/armnn/backends/test/NormTestImpl.hpp b/src/armnn/backends/test/NormTestImpl.hpp
index 1f6aadc9df..d9dc01592a 100644
--- a/src/armnn/backends/test/NormTestImpl.hpp
+++ b/src/armnn/backends/test/NormTestImpl.hpp
@@ -71,6 +71,7 @@ LayerTestResult<float,4> SimpleNormalizationTestImpl(armnn::IWorkloadFactory& wo
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
@@ -227,7 +228,9 @@ LayerTestResult<float,4> CompareNormalizationTestImpl(armnn::IWorkloadFactory& w
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
diff --git a/src/armnn/backends/test/Reference.cpp b/src/armnn/backends/test/Reference.cpp
index 89e5db8e43..b60483a4d9 100644
--- a/src/armnn/backends/test/Reference.cpp
+++ b/src/armnn/backends/test/Reference.cpp
@@ -49,6 +49,9 @@ ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, true)
+ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dAsymmetric, DepthwiseConvolution2dAsymmetricTest, false)
+
// Pooling
ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2, SimpleMaxPooling2dSize2x2Stride2x2Test, false)
ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize2x2Stride2x2Uint8, SimpleMaxPooling2dSize2x2Stride2x2Uint8Test, false)
diff --git a/src/armnn/backends/test/SoftmaxTestImpl.hpp b/src/armnn/backends/test/SoftmaxTestImpl.hpp
index 5aa74f9618..4c3e0b73dd 100644
--- a/src/armnn/backends/test/SoftmaxTestImpl.hpp
+++ b/src/armnn/backends/test/SoftmaxTestImpl.hpp
@@ -62,6 +62,7 @@ LayerTestResult<T, 2> SimpleSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFac
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
+ workloadFactory.Finalize();
workload->Execute();
CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
@@ -140,11 +141,13 @@ LayerTestResult<T, 2> CompareSoftmaxTestImpl(armnn::IWorkloadFactory& workloadFa
CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0]);
+ workloadFactory.Finalize();
workload->Execute();
+ refWorkloadFactory.Finalize();
workloadRef->Execute();
CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
CopyDataFromITensorHandle(&ret.outputExpected[0][0], outputHandleRef.get());
return ret;
-} \ No newline at end of file
+}
diff --git a/src/armnn/backends/test/SplitterTestImpl.hpp b/src/armnn/backends/test/SplitterTestImpl.hpp
index b72046e4bc..70b798eafa 100644
--- a/src/armnn/backends/test/SplitterTestImpl.hpp
+++ b/src/armnn/backends/test/SplitterTestImpl.hpp
@@ -25,31 +25,34 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo
unsigned int inputHeight = 6;
unsigned int inputChannels = 3;
- unsigned int outputWidth1 = 2;
- unsigned int outputHeight1 = 2;
- unsigned int outputChannels1 = 3;
+ // NOTE: Compute Library imposes a restriction that the x and y dimension (input height and width)
+ // cannot be split.
+ // For the reasons for this see first comment on https://jira.arm.com/browse/IVGCVSW-1239
+ //
+ // this test has therefore been recast to split the channels, then split the resulting subtensor
- unsigned int outputWidth2 = 2;
- unsigned int outputHeight2 = 4;
- unsigned int outputChannels2 = 3;
+ // to take channel 0 of original output
+ // and channel 0 and channel 1 of the split subtensor
+ unsigned int outputWidth1 = inputWidth;
+ unsigned int outputHeight1 = inputHeight;
+ unsigned int outputChannels1 = 1;
- unsigned int outputWidth3 = 3;
- unsigned int outputHeight3 = 6;
- unsigned int outputChannels3 = 2;
-
- unsigned int outputWidth4 = 3;
- unsigned int outputHeight4 = 6;
- unsigned int outputChannels4 = 1;
+ // to take channel 1 and 2 of the original output
+ unsigned int outputWidth2 = inputWidth;
+ unsigned int outputHeight2 = inputHeight;
+ unsigned int outputChannels2 = 2;
// Define the tensor descriptors
armnn::TensorInfo inputTensorInfo({ inputChannels, inputHeight, inputWidth }, armnn::GetDataType<T>());
+
+ // outputs of the original split
armnn::TensorInfo outputTensorInfo1({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>());
armnn::TensorInfo outputTensorInfo2({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>());
- armnn::TensorInfo outputTensorInfo3({ outputChannels3, outputHeight3, outputWidth3 }, armnn::GetDataType<T>());
- armnn::TensorInfo outputTensorInfo4({ outputChannels4, outputHeight4, outputWidth4 }, armnn::GetDataType<T>());
- // note that output 5 should match output 2
- armnn::TensorInfo outputTensorInfo5({ outputChannels2, outputHeight2, outputWidth2 }, armnn::GetDataType<T>());
+
+ // outputs of the subsequent subtensor split
+ armnn::TensorInfo outputTensorInfo3({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>());
+ armnn::TensorInfo outputTensorInfo4({ outputChannels1, outputHeight1, outputWidth1 }, armnn::GetDataType<T>());
// Set quantization parameters if the requested type is a quantized type.
// The quantization doesn't really matter as the splitter operator doesn't dequantize/quantize
@@ -65,15 +68,12 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo
outputTensorInfo3.SetQuantizationOffset(qOffset);
outputTensorInfo4.SetQuantizationScale(qScale);
outputTensorInfo4.SetQuantizationOffset(qOffset);
- outputTensorInfo5.SetQuantizationScale(qScale);
- outputTensorInfo5.SetQuantizationOffset(qOffset);
}
LayerTestResult<T,3> ret1(outputTensorInfo1);
LayerTestResult<T,3> ret2(outputTensorInfo2);
LayerTestResult<T,3> ret3(outputTensorInfo3);
LayerTestResult<T,3> ret4(outputTensorInfo4);
- LayerTestResult<T,3> ret5(outputTensorInfo5);
auto input = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>(
QuantizedVector<T>(qScale, qOffset, {
@@ -100,98 +100,74 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo
})
));
-
+ // channel 0 of the original input
ret1.outputExpected = MakeTensor<T, 3>(outputTensorInfo1, std::vector<T>(
QuantizedVector<T>(qScale, qOffset, {
- 1.0f, 2.0f,
- 6.0f, 7.0f,
-
- 31.0f, 32.0f,
- 36.0f, 37.0f,
-
- 61.0f, 62.0f,
- 66.0f, 67.0f,
+ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+ 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+ 11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
+ 16.0f, 17.0f, 18.0f, 19.0f, 20.0f,
+ 21.0f, 22.0f, 23.0f, 24.0f, 25.0f,
+ 26.0f, 27.0f, 28.0f, 29.0f, 30.0f,
})
));
+ // channel 1 & 2 of the original input
ret2.outputExpected = MakeTensor<T, 3>(outputTensorInfo2, std::vector<T>(
QuantizedVector<T>(qScale, qOffset, {
- 11.0f, 12.0f,
- 16.0f, 17.0f,
- 21.0f, 22.0f,
- 26.0f, 27.0f,
-
- 41.0f, 42.0f,
- 46.0f, 47.0f,
- 51.0f, 52.0f,
- 56.0f, 57.0f,
-
- 71.0f, 72.0f,
- 76.0f, 77.0f,
- 81.0f, 82.0f,
- 86.0f, 87.0f,
+ 31.0f, 32.0f, 33.0f, 34.0f, 35.0f,
+ 36.0f, 37.0f, 38.0f, 39.0f, 40.0f,
+ 41.0f, 42.0f, 43.0f, 44.0f, 45.0f,
+ 46.0f, 47.0f, 48.0f, 49.0f, 50.0f,
+ 51.0f, 52.0f, 53.0f, 54.0f, 55.0f,
+ 56.0f, 57.0f, 58.0f, 59.0f, 60.0f,
+
+ 61.0f, 62.0f, 63.0f, 64.0f, 65.0f,
+ 66.0f, 67.0f, 68.0f, 69.0f, 70.0f,
+ 71.0f, 72.0f, 73.0f, 74.0f, 75.0f,
+ 76.0f, 77.0f, 78.0f, 79.0f, 80.0f,
+ 81.0f, 82.0f, 83.0f, 84.0f, 85.0f,
+ 86.0f, 87.0f, 88.0f, 89.0f, 90.0f,
})
));
+ // channel 0 of return 2 (i.e. channels 1 and 2 of the original input)
ret3.outputExpected = MakeTensor<T, 3>(outputTensorInfo3, std::vector<T>(
QuantizedVector<T>(qScale, qOffset, {
- 3.0f, 4.0f, 5.0f,
- 8.0f, 9.0f, 10.0f,
- 13.0f, 14.0f, 15.0f,
- 18.0f, 19.0f, 20.0f,
- 23.0f, 24.0f, 25.0f,
- 28.0f, 29.0f, 30.0f,
-
- 33.0f, 34.0f, 35.0f,
- 38.0f, 39.0f, 40.0f,
- 43.0f, 44.0f, 45.0f,
- 48.0f, 49.0f, 50.0f,
- 53.0f, 54.0f, 55.0f,
- 58.0f, 59.0f, 60.0f,
+ 31.0f, 32.0f, 33.0f, 34.0f, 35.0f,
+ 36.0f, 37.0f, 38.0f, 39.0f, 40.0f,
+ 41.0f, 42.0f, 43.0f, 44.0f, 45.0f,
+ 46.0f, 47.0f, 48.0f, 49.0f, 50.0f,
+ 51.0f, 52.0f, 53.0f, 54.0f, 55.0f,
+ 56.0f, 57.0f, 58.0f, 59.0f, 60.0f,
})
));
+ // channel 1 of return 2
ret4.outputExpected = MakeTensor<T, 3>(outputTensorInfo4, std::vector<T>(
QuantizedVector<T>(qScale, qOffset, {
- 63.0f, 64.0f, 65.0f,
- 68.0f, 69.0f, 70.0f,
- 73.0f, 74.0f, 75.0f,
- 78.0f, 79.0f, 80.0f,
- 83.0f, 84.0f, 85.0f,
- 88.0f, 89.0f, 90.0f,
- })
- ));
-
-
- ret5.outputExpected = MakeTensor<T, 3>(outputTensorInfo5, std::vector<T>(
- QuantizedVector<T>(qScale, qOffset, {
- 11.0f, 12.0f,
- 16.0f, 17.0f,
- 21.0f, 22.0f,
- 26.0f, 27.0f,
-
- 41.0f, 42.0f,
- 46.0f, 47.0f,
- 51.0f, 52.0f,
- 56.0f, 57.0f,
-
- 71.0f, 72.0f,
- 76.0f, 77.0f,
- 81.0f, 82.0f,
- 86.0f, 87.0f,
+ 61.0f, 62.0f, 63.0f, 64.0f, 65.0f,
+ 66.0f, 67.0f, 68.0f, 69.0f, 70.0f,
+ 71.0f, 72.0f, 73.0f, 74.0f, 75.0f,
+ 76.0f, 77.0f, 78.0f, 79.0f, 80.0f,
+ 81.0f, 82.0f, 83.0f, 84.0f, 85.0f,
+ 86.0f, 87.0f, 88.0f, 89.0f, 90.0f,
})
));
+ // NOTE: as a corollary of the no splitting of x and y restriction the x and y values of the view origins
+ // have to be zero, the co-ordinates are as per the tensor info above channels, height/y, width/x
+ // note that under the hood the compute engine reverses these i.e. its coordinate system is x, y, channels
std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of output[0]
armnn::SplitterQueueDescriptor::ViewOrigin window1(wOrigin1);
- std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of output[1]
+ std::vector<unsigned int> wOrigin2 = {1, 0, 0}; //extent of the window is defined by size of output[1]
armnn::SplitterQueueDescriptor::ViewOrigin window2(wOrigin2);
- std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of output[2]
+ std::vector<unsigned int> wOrigin3 = {0, 0, 0}; //extent of the window is defined by size of output[2]
armnn::SplitterQueueDescriptor::ViewOrigin window3(wOrigin3);
- std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of output[3]
+ std::vector<unsigned int> wOrigin4 = {1, 0, 0}; //extent of the window is defined by size of output[3]
armnn::SplitterQueueDescriptor::ViewOrigin window4(wOrigin4);
bool subTensorsSupported = workloadFactory.SupportsSubTensors();
@@ -210,43 +186,29 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo
std::unique_ptr<armnn::ITensorHandle> outputHandle3 =
subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo3.GetShape(), wOrigin3.data()) :
+ workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo3.GetShape(), wOrigin3.data()) :
workloadFactory.CreateTensorHandle(outputTensorInfo3);
std::unique_ptr<armnn::ITensorHandle> outputHandle4 =
subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo4.GetShape(), wOrigin4.data()) :
+ workloadFactory.CreateSubTensorHandle(*outputHandle2, outputTensorInfo4.GetShape(), wOrigin4.data()) :
workloadFactory.CreateTensorHandle(outputTensorInfo4);
- std::unique_ptr<armnn::ITensorHandle> outputHandle5 =
- subTensorsSupported ?
- workloadFactory.CreateSubTensorHandle(*inputHandle, outputTensorInfo5.GetShape(), wOrigin2.data()) :
- workloadFactory.CreateTensorHandle(outputTensorInfo5);
-
+ // Do the first split
armnn::SplitterQueueDescriptor data;
armnn::WorkloadInfo info;
AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
AddOutputToWorkload(data, info, outputTensorInfo1, outputHandle1.get());
AddOutputToWorkload(data, info, outputTensorInfo2, outputHandle2.get());
- AddOutputToWorkload(data, info, outputTensorInfo3, outputHandle3.get());
- AddOutputToWorkload(data, info, outputTensorInfo4, outputHandle4.get());
- AddOutputToWorkload(data, info, outputTensorInfo5, outputHandle5.get());
data.m_ViewOrigins.push_back(window1);
data.m_ViewOrigins.push_back(window2);
- data.m_ViewOrigins.push_back(window3);
- data.m_ViewOrigins.push_back(window4);
- //add window2 again (to have an overlapping split)
- data.m_ViewOrigins.push_back(window2);
std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSplitter(data, info);
inputHandle->Allocate();
outputHandle1->Allocate();
outputHandle2->Allocate();
- outputHandle3->Allocate();
- outputHandle4->Allocate();
- outputHandle5->Allocate();
CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0]);
@@ -254,11 +216,28 @@ std::vector<LayerTestResult<T,3>> SplitterTestCommon(armnn::IWorkloadFactory& wo
CopyDataFromITensorHandle(&ret1.output[0][0][0], outputHandle1.get());
CopyDataFromITensorHandle(&ret2.output[0][0][0], outputHandle2.get());
+
+// // Do the second split
+ armnn::SplitterQueueDescriptor data2;
+ armnn::WorkloadInfo info2;
+ AddInputToWorkload(data2, info2, outputTensorInfo2, outputHandle2.get());
+ AddOutputToWorkload(data2, info2, outputTensorInfo3, outputHandle3.get());
+ AddOutputToWorkload(data2, info2, outputTensorInfo4, outputHandle4.get());
+
+ data2.m_ViewOrigins.push_back(window3);
+ data2.m_ViewOrigins.push_back(window4);
+
+ std::unique_ptr<armnn::IWorkload> workload2 = workloadFactory.CreateSplitter(data2, info2);
+
+ outputHandle3->Allocate();
+ outputHandle4->Allocate();
+
+ workload2->Execute();
+
CopyDataFromITensorHandle(&ret3.output[0][0][0], outputHandle3.get());
CopyDataFromITensorHandle(&ret4.output[0][0][0], outputHandle4.get());
- CopyDataFromITensorHandle(&ret5.output[0][0][0], outputHandle5.get());
- std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4, ret5};
+ std::vector<LayerTestResult<T,3>> ret = {ret1, ret2, ret3, ret4,};
return ret;
}