From 0841ca085301e8ddbc9627b2be55758b66437c15 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 1 Feb 2021 14:37:02 +0000 Subject: Fix OpenCL direct convolution - The ARM DOT macro was using wrong variables for performing the dot product - K0 could be a non power of 2 values when IFM was not a multiple of 16 - Refactor the test for direct convolution NHWC Resolves COMPMID-4135, COMPMID-4155 Change-Id: I3a2dc89ef613ae20245cfc28e76ea36c55eaf81d Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4962 Comments-Addressed: Arm Jenkins Reviewed-by: TeresaARM Tested-by: Arm Jenkins --- tests/validation/CL/DirectConvolutionLayer.cpp | 232 +++++++++++++++++++------ 1 file changed, 177 insertions(+), 55 deletions(-) (limited to 'tests/validation') diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp index ae2f22dd1e..3a6cacc0ba 100644 --- a/tests/validation/CL/DirectConvolutionLayer.cpp +++ b/tests/validation/CL/DirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -161,14 +161,178 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } -// clang-format on -// *INDENT-ON* template using CLDirectConvolutionLayerFixture = DirectConvolutionValidationFixture; template using CLDirectConvolutionValidationWithTensorShapesFixture = DirectConvolutionValidationWithTensorShapesFixture; +template +using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture; +template +using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture; + +TEST_SUITE(NHWC) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(9U, 5U, 6U, 4U), + TensorShape(3U, 5U, 7U, 2U), + TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 7, 3, 1, 3 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(9U, 5U, 6U, 4U), + TensorShape(3U, 5U, 7U, 2U), + TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 7, 3, 1, 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_fp32); +} + +TEST_SUITE_END() // FP32 + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(9U, 5U, 6U, 4U), + TensorShape(3U, 5U, 7U, 2U), + TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 7, 3, 1, 3 })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", QuantizationInfo(1.1f / 255, 10))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 +// +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(9U, 5U, 6U, 4U), + TensorShape(3U, 5U, 7U, 2U), + TensorShape(32U, 37U, 3U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 7, 3, 1, 3 })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))), + framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized +TEST_SUITE_END() // NHWC + +// clang-format on +// *INDENT-ON* +TEST_SUITE(NCHW) TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType", DataType::F16)), @@ -185,59 +349,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture, framewor // Validate output validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_9x9, framework::dataset::make("DataType", - DataType::F16)), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) -{ - validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); -} -FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit_9x9, framework::dataset::make("DataType", - DataType::F16)), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) -{ - validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num); -} TEST_SUITE_END() // FP16 TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType", DataType::F32)), ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { validate(CLAccessor(_target), _reference, tolerance_fp32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly, framework::dataset::make("DataType", DataType::F32)), ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) -{ - validate(CLAccessor(_target), _reference, tolerance_fp32); -} -FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_9x9, framework::dataset::make("DataType", - DataType::F32)), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW }))) { validate(CLAccessor(_target), _reference, tolerance_fp32); } -FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit_9x9, framework::dataset::make("DataType", - DataType::F32)), - ActivationFunctionsDataset), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) -{ - validate(CLAccessor(_target), _reference, tolerance_fp32); -} - -FIXTURE_DATA_TEST_CASE(RunLargeUsecase, CLDirectConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_usecase, framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_fp32, 0.f, tolerance_fp32_abs); -} TEST_SUITE_END() // FP32 TEST_SUITE(FP32_CustomDataset) @@ -251,11 +378,6 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesFixture TEST_SUITE_END() // FP32_CustomDataset TEST_SUITE_END() // Float -template -using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture; -template -using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture; - const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), @@ -269,7 +391,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture