diff options
author | Nikhil Raj <nikhil.raj@arm.com> | 2018-10-18 14:27:50 +0100 |
---|---|---|
committer | Matthew Bentham <matthew.bentham@arm.com> | 2018-10-22 16:57:54 +0100 |
commit | d134093a271b60e248942af9757e8236e8f41ac1 (patch) | |
tree | 7b15baae3cd028dc4cb0f0bb5302ecfbc9db8225 /src/backends/cl | |
parent | 33f0ae0d293f5048089f2a04985a8a8bfa1d75a6 (diff) | |
download | armnn-d134093a271b60e248942af9757e8236e8f41ac1.tar.gz |
IVGCVSW-2023 CL and Neon implementation of BatchNorm with NHWC
Change-Id: I962e986607e5d045cd97b9eaeaea2f5ae624db35
Diffstat (limited to 'src/backends/cl')
-rw-r--r-- | src/backends/cl/test/ClCreateWorkloadTests.cpp | 38 | ||||
-rwxr-xr-x | src/backends/cl/test/ClLayerTests.cpp | 1 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp | 24 |
3 files changed, 49 insertions, 14 deletions
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 756b4a603b..b5fc031461 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -144,31 +144,53 @@ BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest) } template <typename BatchNormalizationWorkloadType, armnn::DataType DataType> -static void ClCreateBatchNormalizationWorkloadTest() +static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout) { Graph graph; ClWorkloadFactory factory; auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType> - (factory, graph); + (factory, graph, dataLayout); // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest). BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData(); auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); - BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 1, 1})); - BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3, 1, 1})); + switch (dataLayout) + { + case DataLayout::NHWC: + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 })); + break; + default: // NCHW + BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 })); + BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 })); + } +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload) +{ + ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, + armnn::DataType::Float32>(DataLayout::NCHW); +} + +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload) +{ + ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, + armnn::DataType::Float16>(DataLayout::NCHW); } -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatWorkload) +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload) { - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, armnn::DataType::Float32>(); + ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, + armnn::DataType::Float32>(DataLayout::NHWC); } -BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16Workload) +BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload) { - ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, armnn::DataType::Float16>(); + ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload, + armnn::DataType::Float16>(DataLayout::NHWC); } BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload) diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 3b1603c13c..a4f824a47e 100755 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -181,6 +181,7 @@ ARMNN_AUTO_TEST_CASE(MultiplicationBroadcast1DVectorUint8, MultiplicationBroadca // Batch Norm ARMNN_AUTO_TEST_CASE(BatchNorm, BatchNormTest) +ARMNN_AUTO_TEST_CASE(BatchNormNhwc, BatchNormNhwcTest) // L2 Normalization ARMNN_AUTO_TEST_CASE(L2Normalization1d, L2Normalization1dTest) diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp index 5bff7a63c9..24be7cddca 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -23,12 +23,20 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& gamma, const BatchNormalizationDescriptor &desc) { - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); - const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); - const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); - const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); - const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + const DataLayout dataLayout = desc.m_DataLayout.GetDataLayout(); + + const arm_compute::TensorInfo aclInputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(input, dataLayout); + const arm_compute::TensorInfo aclOutputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(output, dataLayout); + const arm_compute::TensorInfo aclMeanInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(mean, dataLayout); + const arm_compute::TensorInfo aclVarInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(var, dataLayout); + const arm_compute::TensorInfo aclBetaInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(beta, dataLayout); + const arm_compute::TensorInfo aclGammaInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(gamma, dataLayout); return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, @@ -60,6 +68,10 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout.GetDataLayout()); + input.info()->set_data_layout(aclDataLayout); + output.info()->set_data_layout(aclDataLayout); + m_Layer.configure(&input, &output, m_Mean.get(), |