aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikhil Raj <nikhil.raj@arm.com>2018-10-12 13:51:57 +0100
committerMatthew Bentham <matthew.bentham@arm.com>2018-10-22 16:57:53 +0100
commitcec6b655d9f6ddb73c721ca47a7d67eeaad5c043 (patch)
tree898dbc90785f77f9c724bb8de61d7fb1b1ce3c0b
parentf3eb46d23c6001150d36d80acac7ad1247174630 (diff)
downloadarmnn-cec6b655d9f6ddb73c721ca47a7d67eeaad5c043.tar.gz
IVGCVSW-1922 Unit test for DepthwiseConvolution with NHWC
Change-Id: I3e6e5b9a62f30d03c05bd7178adea8f4c8275da8
-rw-r--r--src/armnn/layers/DepthwiseConvolution2dLayer.cpp2
-rw-r--r--src/armnn/test/CreateWorkload.hpp46
-rw-r--r--src/backends/WorkloadData.cpp6
-rw-r--r--src/backends/WorkloadData.hpp2
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp30
-rwxr-xr-x[-rw-r--r--]src/backends/cl/test/ClLayerTests.cpp1
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp8
-rw-r--r--src/backends/neon/test/NeonCreateWorkloadTests.cpp39
-rw-r--r--src/backends/neon/test/NeonLayerTests.cpp1
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp8
-rw-r--r--src/backends/reference/test/RefCreateWorkloadTests.cpp13
-rwxr-xr-x[-rw-r--r--]src/backends/test/Conv2dTestImpl.hpp100
-rwxr-xr-x[-rw-r--r--]src/backends/test/LayerTests.cpp119
-rw-r--r--src/backends/test/LayerTests.hpp3
15 files changed, 340 insertions, 46 deletions
diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
index e1d433c640..393c4bf6f2 100644
--- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
+++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp
@@ -29,8 +29,6 @@ std::unique_ptr<IWorkload> DepthwiseConvolution2dLayer::CreateWorkload(const Gra
descriptor.m_Weight = m_Weight.get();
- descriptor.m_DataLayout = GetParameters().m_DataLayout;
-
if (m_Param.m_BiasEnabled)
{
BOOST_ASSERT_MSG(m_Bias != nullptr, "DepthwiseConvolution2dLayer: Bias data should not be null.");
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index a33189efeb..f3cf544fa3 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -397,52 +397,56 @@ std::unique_ptr<Convolution2dWorkload> CreateDirectConvolution2dWorkloadTest(arm
return workload;
}
-template <typename DepthwiseConvolution2dFloat32Workload>
+template <typename DepthwiseConvolution2dFloat32Workload, armnn::DataType DataType>
std::unique_ptr<DepthwiseConvolution2dFloat32Workload> CreateDepthwiseConvolution2dWorkloadTest(
- armnn::IWorkloadFactory& factory, armnn::Graph& graph)
+ armnn::IWorkloadFactory& factory, armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
{
// Creates the layer we're testing.
DepthwiseConvolution2dDescriptor layerDesc;
- layerDesc.m_PadLeft = 3;
- layerDesc.m_PadRight = 3;
+ layerDesc.m_PadLeft = 1;
+ layerDesc.m_PadRight = 2;
layerDesc.m_PadTop = 1;
- layerDesc.m_PadBottom = 1;
- layerDesc.m_StrideX = 2;
- layerDesc.m_StrideY = 4;
- layerDesc.m_BiasEnabled = true;
+ layerDesc.m_PadBottom = 2;
+ layerDesc.m_StrideX = 1;
+ layerDesc.m_StrideY = 1;
+ layerDesc.m_BiasEnabled = false;
+ layerDesc.m_DataLayout = dataLayout;
DepthwiseConvolution2dLayer* const layer = graph.AddLayer<DepthwiseConvolution2dLayer>(layerDesc, "layer");
- layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({3, 3, 5, 3}, DataType::Float32));
- layer->m_Bias = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({9}, DataType::Float32));
+ layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({1, 4, 4, 2}, DataType));
layer->m_Weight->Allocate();
- layer->m_Bias->Allocate();
// Creates extra layers.
Layer* const input = graph.AddLayer<InputLayer>(0, "input");
Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
+ TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
+ TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
+ TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
+ TensorShape{ 2, 2, 5, 5 } : TensorShape{ 2, 5, 5, 2 };
+
// Connects up.
- Connect(input, layer, TensorInfo({2, 3, 8, 16}, armnn::DataType::Float32));
- Connect(layer, output, TensorInfo({2, 9, 2, 10}, armnn::DataType::Float32));
+ Connect(input, layer, TensorInfo(inputShape, DataType));
+ Connect(layer, output, TensorInfo(outputShape, DataType));
CreateTensorHandles(graph, factory);
// Makes the workload and checks it.
auto workload = MakeAndCheckWorkload<DepthwiseConvolution2dFloat32Workload>(*layer, graph, factory);
DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
- BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
- BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 4);
- BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 3);
- BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 3);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 1);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_StrideY == 1);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_PadLeft == 1);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 2);
BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1);
- BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1);
- BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 2);
+ BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == false);
+ BOOST_TEST((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
- BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({3, 3, 5, 3}, DataType::Float32)));
- BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({9}, DataType::Float32)));
+ BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({1, 4, 4, 2}, DataType)));
// Returns so we can do extra, backend-specific tests.
return workload;
diff --git a/src/backends/WorkloadData.cpp b/src/backends/WorkloadData.cpp
index d562b73053..ef31fbd1fb 100644
--- a/src/backends/WorkloadData.cpp
+++ b/src/backends/WorkloadData.cpp
@@ -579,10 +579,12 @@ void DepthwiseConvolution2dQueueDescriptor::Validate(const WorkloadInfo& workloa
ValidatePointer(m_Weight, "DepthwiseConvolution2dQueueDescriptor", "weight");
ValidateTensorNumDimensions(m_Weight->GetTensorInfo(), "DepthwiseConvolution2dQueueDescriptor", 4, "weight");
+ const unsigned int channelIndex = (m_Parameters.m_DataLayout == DataLayout::NCHW) ? 1 : 3;
+
//inputChannels * channelMultiplier should be equal to outputChannels.
const unsigned int numWeightChannelMultiplier = m_Weight->GetTensorInfo().GetShape()[0];
- const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[1];
- const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[1];
+ const unsigned int numWeightInputChannels = m_Weight->GetTensorInfo().GetShape()[channelIndex];
+ const unsigned int numWeightOutputChannels = workloadInfo.m_OutputTensorInfos[0].GetShape()[channelIndex];
if (numWeightChannelMultiplier * numWeightInputChannels != numWeightOutputChannels)
{
throw InvalidArgumentException(
diff --git a/src/backends/WorkloadData.hpp b/src/backends/WorkloadData.hpp
index c7777b0eed..40e89f76e4 100644
--- a/src/backends/WorkloadData.hpp
+++ b/src/backends/WorkloadData.hpp
@@ -160,13 +160,11 @@ struct DepthwiseConvolution2dQueueDescriptor : QueueDescriptorWithParameters<Dep
DepthwiseConvolution2dQueueDescriptor()
: m_Weight(nullptr)
, m_Bias(nullptr)
- , m_DataLayout(DataLayout::NCHW)
{
}
const ConstCpuTensorHandle* m_Weight;
const ConstCpuTensorHandle* m_Bias;
- DataLayout m_DataLayout;
void Validate(const WorkloadInfo& workloadInfo) const;
};
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 67f3e3c5bb..66c2c2aa40 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -243,6 +243,36 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
}
+template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
+static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
+{
+ Graph graph;
+ ClWorkloadFactory factory;
+
+ auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
+ (factory, graph, dataLayout);
+
+ // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
+ DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
+ auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+ auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+
+ std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW)
+ ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
+ : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
+ std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
+ ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
+ : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
+
+ BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
+ BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
+}
+
+BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
+{
+ ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
+}
+
template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
static void ClDirectConvolution2dWorkloadTest()
{
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index cea7470b76..9934c927e6 100644..100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -65,6 +65,7 @@ ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquareNhwc, SimpleConvolution2d3x3NhwcTe
// Depthwise Convolution
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthNhwc, DepthwiseConvolution2dDepthNhwcTest, false)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false)
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 142cbc230f..6fa9ddc6b0 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -60,12 +60,12 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo);
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
- BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
}
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
@@ -82,6 +82,10 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+ input.info()->set_data_layout(aclDataLayout);
+ output.info()->set_data_layout(aclDataLayout);
+
const unsigned int depthMultiplier = weightInfo.GetShape()[0];
//Check for optimisation opportunities.
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 244002f132..ac0451f11b 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -219,6 +219,45 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float32>(DataLayout::NHWC);
}
+template <typename DepthwiseConvolution2dFloat32WorkloadType, typename armnn::DataType DataType>
+static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
+{
+ Graph graph;
+ NeonWorkloadFactory factory;
+
+ auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolution2dFloat32WorkloadType,
+ DataType>(factory, graph, dataLayout);
+
+ // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
+ DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
+ auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
+ auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
+
+ std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW)
+ ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
+ : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
+ std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
+ ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
+ : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
+
+ BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
+ BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
+}
+
+BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
+{
+ NeonCreateDepthWiseConvolutionWorkloadTest<NeonDepthwiseConvolutionFloatWorkload,
+ DataType::Float32>(DataLayout::NHWC);
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
+{
+ NeonCreateDepthWiseConvolutionWorkloadTest<NeonDepthwiseConvolutionFloatWorkload,
+ DataType::Float16>(DataLayout::NHWC);
+}
+#endif
+
template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
static void NeonCreateFullyConnectedWorkloadTest()
{
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 2d4ee996a4..36138b3c3f 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -84,6 +84,7 @@ BOOST_AUTO_TEST_CASE(Conv2dUtils)
// Depthwise Convolution
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, true)
+ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthNhwc, DepthwiseConvolution2dDepthNhwcTest, false)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1, DepthwiseConvolution2dDepthMul1Test, false)
ARMNN_AUTO_TEST_CASE(DepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, true)
ARMNN_AUTO_TEST_CASE(UnbiasedDepthwiseConvolution2dDepthMul1Uint8, DepthwiseConvolution2dDepthMul1Uint8Test, false)
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp
index 742a768b94..4b266f3288 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp
@@ -20,12 +20,12 @@ NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload(
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
m_KernelTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout);
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
m_BiasTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout);
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
}
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
@@ -41,6 +41,10 @@ NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload(
arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+ input.info()->set_data_layout(aclDataLayout);
+ output.info()->set_data_layout(aclDataLayout);
+
bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3;
if (use3x3Optimisation)
{
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp
index 722b778eba..6c6c2dfb6c 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp
@@ -20,12 +20,12 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload(
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
m_KernelTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout);
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
if (m_Data.m_Parameters.m_BiasEnabled)
{
m_BiasTensor = std::make_unique<arm_compute::Tensor>();
- BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout);
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
}
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
@@ -41,6 +41,10 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload(
arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+ input.info()->set_data_layout(aclDataLayout);
+ output.info()->set_data_layout(aclDataLayout);
+
bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3;
if (use3x3Optimisation)
{
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index e8d536f6e8..a8901d2cc5 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -190,19 +190,6 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dWorkload)
TensorInfo({2, 2, 2, 10}, DataType::Float32));
}
-BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolution2dWorkload)
-{
- Graph graph;
- RefWorkloadFactory factory;
- auto workload =
- CreateDepthwiseConvolution2dWorkloadTest<RefDepthwiseConvolution2dFloat32Workload>(factory, graph);
-
- // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
- CheckInputOutput(std::move(workload),
- TensorInfo({2, 3, 8, 16}, DataType::Float32),
- TensorInfo({2, 9, 2, 10}, DataType::Float32));
-}
-
template <typename FullyConnectedWorkloadType, armnn::DataType DataType>
static void RefCreateFullyConnectedWorkloadTest()
{
diff --git a/src/backends/test/Conv2dTestImpl.hpp b/src/backends/test/Conv2dTestImpl.hpp
index 8e29615c47..d8c104007c 100644..100755
--- a/src/backends/test/Conv2dTestImpl.hpp
+++ b/src/backends/test/Conv2dTestImpl.hpp
@@ -691,6 +691,106 @@ LayerTestResult<T, 4> DepthwiseConvolution2dTestImpl(armnn::IWorkloadFactory& wo
return ret;
}
+template<typename T, typename B>
+LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestImpl(armnn::IWorkloadFactory& workloadFactory,
+ const boost::multi_array<T, 4>& input,
+ const boost::multi_array<T, 4>& kernel,
+ const boost::multi_array<B, 1>& bias,
+ const boost::multi_array<T, 4>& outputExpected,
+ float qScale,
+ int32_t qOffset,
+ uint32_t padLeft = 0,
+ uint32_t padTop = 0,
+ uint32_t padRight = 0,
+ uint32_t padBottom = 0,
+ uint32_t strideX = 1,
+ uint32_t strideY = 1)
+{
+ unsigned int inputNum = boost::numeric_cast<unsigned int>(input.shape()[0]);
+ unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[3]);
+ unsigned int inputHeight = boost::numeric_cast<unsigned int>(input.shape()[1]);
+ unsigned int inputWidth = boost::numeric_cast<unsigned int>(input.shape()[2]);
+
+ unsigned int kernelChanMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+ unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
+ unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
+ unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
+
+ unsigned int outputNum = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
+ unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
+ unsigned int outputHeight = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
+ unsigned int outputWidth = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
+
+ // Creates the tensors.
+ armnn::TensorInfo inputTensorInfo({inputNum, inputHeight, inputWidth, inputChannels}, armnn::GetDataType<T>());
+ armnn::TensorInfo outputTensorInfo({outputNum, outputHeight, outputWidth, outputChannels},
+ armnn::GetDataType<T>());
+ armnn::TensorInfo kernelDesc({kernelChanMul, kernelHeight, kernelWidth, kernelChannels}, armnn::GetDataType<T>());
+ armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
+
+ // Set quantization parameters if the requested type is a quantized type.
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(qScale);
+ inputTensorInfo.SetQuantizationOffset(qOffset);
+ outputTensorInfo.SetQuantizationScale(qScale);
+ outputTensorInfo.SetQuantizationOffset(qOffset);
+ kernelDesc.SetQuantizationScale(qScale);
+ kernelDesc.SetQuantizationOffset(qOffset);
+ biasDesc.SetQuantizationScale(qScale*qScale);
+ biasDesc.SetQuantizationOffset(0);
+ }
+
+ // Construct the input data.
+ std::vector<T> inputData;
+ inputData.assign(input.data(), input.data() + inputHeight*inputWidth*inputChannels);
+ auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+ // Construct the output data, with bias applied, as appropriate.
+ std::vector<T> outputData;
+ outputData.assign(outputExpected.data(), outputExpected.data() + outputHeight*outputWidth*outputChannels);
+
+ LayerTestResult<T, 4> ret(outputTensorInfo);
+ ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
+ AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
+
+ armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
+
+ armnn::DepthwiseConvolution2dQueueDescriptor data;
+ data.m_Weight = &weightsTensor;
+ data.m_Bias = &biasTensor; // Still set this whether or not bias is enabled - it can be a source of bugs.
+ data.m_Parameters.m_StrideX = strideX;
+ data.m_Parameters.m_StrideY = strideY;
+ data.m_Parameters.m_PadLeft = padLeft;
+ data.m_Parameters.m_PadRight = padRight;
+ data.m_Parameters.m_PadTop = padTop;
+ data.m_Parameters.m_PadBottom = padBottom;
+ data.m_Parameters.m_DataLayout = armnn::DataLayout::NHWC;
+
+ armnn::WorkloadInfo info;
+ AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthwiseConvolution2d(data, info);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &batchedInput[0][0][0][0]);
+
+ workloadFactory.Finalize();
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+ return ret;
+}
+
template<typename T>
LayerTestResult<T,4> Convolution1dTestImpl(armnn::IWorkloadFactory& workloadFactory,
float qScale,
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index d5f84f0920..f2bc0192ba 100644..100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -493,6 +493,119 @@ LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(armnn::IWorkloa
1); // strideY
}
+template<typename T>
+LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(armnn::IWorkloadFactory& workloadFactory,
+ float qScale,
+ int32_t qOffset,
+ bool biasEnabled)
+{
+ armnn::TensorInfo inputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>());
+ auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
+ QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(), {
+ 0, 25,
+ 1, 26,
+ 2, 27,
+ 3, 28,
+ 4, 29,
+
+ 5, 30,
+ 6, 31,
+ 7, 32,
+ 8, 33,
+ 9, 34,
+
+ 10, 35,
+ 11, 36,
+ 12, 37,
+ 13, 38,
+ 14, 39,
+
+ 15, 40,
+ 16, 41,
+ 17, 42,
+ 18, 43,
+ 19, 44,
+
+ 20, 45,
+ 21, 46,
+ 22, 47,
+ 23, 48,
+ 24, 49
+ })));
+
+ armnn::TensorInfo kernelTensorInfo({ 1, 4, 4, 2}, armnn::GetDataType<T>());
+ auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
+ QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(), {
+ 32, 16,
+ 31, 15,
+ 30, 14,
+ 29, 13,
+
+ 28, 12,
+ 27, 11,
+ 26, 10,
+ 25, 9,
+
+ 24, 8,
+ 23, 7,
+ 22, 6,
+ 21, 5,
+
+ 20, 4,
+ 19, 3,
+ 18, 2,
+ 17, 1
+ })));
+
+ armnn::TensorInfo outputTensorInfo({ 1, 5, 5, 2}, armnn::GetDataType<T>());
+ boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
+ QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(), {
+ 1062, 1550,
+ 1580, 2284,
+ 1850, 2362,
+ 1530, 1955,
+ 1117, 1428,
+
+ 2140, 2910,
+ 3108, 4206,
+ 3500, 4342,
+ 2842, 3528,
+ 2042, 2536,
+
+ 3580, 3390,
+ 5068, 4886,
+ 5460, 5022,
+ 4342, 4068,
+ 3062, 2916,
+
+ 3618, 3566,
+ 5072, 5056,
+ 5390, 5182,
+ 4248, 4133,
+ 2971, 2922,
+
+ 3074, 3100,
+ 4282, 4352,
+ 4510, 4452,
+ 3533, 3517,
+ 2457, 2465
+ })));
+
+ return DepthwiseConvolution2dNhwcTestImpl<T>(workloadFactory,
+ input,
+ kernel,
+ GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
+ expectedOutput,
+ qScale,
+ qOffset,
+ 1, // Padding left.
+ 1, // Padding top.
+ 2, // Padding right.
+ 2, // Padding bottom.
+ 1, // strideX
+ 1); // strideY
+}
+
LayerTestResult<float, 4>
Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory)
{
@@ -510,6 +623,12 @@ LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& wo
return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
}
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory& workloadFactory,
+ bool biasEnabled)
+{
+ return DepthwiseConvolution2dNhwcTestCommon<float>(workloadFactory, 0.0f, 0, biasEnabled);
+}
+
LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
bool biasEnabled)
{
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index f5abd985c8..9f8cd3ff25 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -68,6 +68,9 @@ LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& work
LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled);
+LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(armnn::IWorkloadFactory& workloadFactory,
+ bool biasEnabled);
+
LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
bool biasEnabled);