diff options
15 files changed, 344 insertions, 49 deletions
diff --git a/Android.bp b/Android.bp index b02d1048b3..f426f2fbc2 100644 --- a/Android.bp +++ b/Android.bp @@ -587,9 +587,9 @@ cc_library_static { "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp", "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp", "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp", - "src/dynamic_fusion/sketch/OperatorAttributes.cpp", "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", diff --git a/arm_compute/dynamic_fusion/sketch/OperatorAttributes.h b/arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h index 22c6772926..58102d8d88 100644 --- a/arm_compute/dynamic_fusion/sketch/OperatorAttributes.h +++ b/arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_OPERATORATTRIBUTES -#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_OPERATORATTRIBUTES +#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_CONV2DATTRIBUTES +#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_CONV2DATTRIBUTES #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Types.h" @@ -62,4 +62,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_OPERATORATTRIBUTES */ +#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_CONV2DATTRIBUTES */ diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h index 4ba237e3be..42c63df87f 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h @@ -25,7 +25,7 @@ #define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCONV2D #include "arm_compute/core/Error.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" namespace arm_compute { diff --git a/filelist.json b/filelist.json index 8be25712b7..ce45be59af 100644 --- a/filelist.json +++ b/filelist.json @@ -2205,12 +2205,12 @@ "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp", "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", - "src/dynamic_fusion/sketch/OperatorAttributes.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp", diff --git a/src/dynamic_fusion/sketch/OperatorAttributes.cpp b/src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp index 205ce687a3..97e74f742d 100644 --- a/src/dynamic_fusion/sketch/OperatorAttributes.cpp +++ b/src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" namespace arm_compute { diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp index 1fbcb41028..c8e682f34a 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h" diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 9b006b13ce..736ce9bf5b 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -24,7 +24,6 @@ #include "ClComponentElementwiseBinary.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h" diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h index d296a361e3..b00d069389 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h @@ -25,10 +25,10 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON #include "arm_compute/core/Error.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" namespace arm_compute { +class ITensorInfo; namespace experimental { namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h index 77f83c9e87..8988d3ca1c 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D #include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h index e69150f3e7..8cca954efe 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY #include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" diff --git a/tests/Utils.h b/tests/Utils.h index e58b8f7f86..7b831468d3 100644 --- a/tests/Utils.h +++ b/tests/Utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -48,7 +48,7 @@ #include <type_traits> #include <vector> -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" #include "arm_compute/runtime/RuntimeContext.h" diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index effd8bfeee..a70f512f9f 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -25,8 +25,8 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h" diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp index bfb9735599..45a2270bb3 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,14 +39,18 @@ namespace test { namespace validation { +namespace +{ +RelativeTolerance<float> tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr float abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/ +constexpr float tolerance_num = 0.02f; /**< Tolerance number */ +} // namespace + TEST_SUITE(CL) TEST_SUITE(DYNAMIC_FUSION) TEST_SUITE(CONV2D) -RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ -constexpr float tolerance_num = 0.02f; /**< Tolerance number */ - template <typename T> using DynamicFusionGpuConv2dFixture = DynamicFusionGpuConv2dValidationFixture<CLTensor, CLAccessor, GpuConv2d, T>; TEST_SUITE(FP32) @@ -71,6 +75,151 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuConv2dFixture<half>, framework: } TEST_SUITE_END() // FP16 +// Tests for specific conv2d methods +TEST_SUITE(DIRECT_CONV2D) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid: Mismatching data type input/weights + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid: Mismatching input feature maps + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid weights dimensions + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Unsupported biases size + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Unsupported biases dimensions + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout: NCHW + TensorInfo(TensorShape(2U, 32U, 16U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type: quantized + TensorInfo(TensorShape(2U, 32U, 16U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Arbitrary weight sizes for NHWC are supported + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Non-rectangular weights dimensions for NHWC are supported + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Strides > 2 for any kernel sizes for NHWC are supported + }), + framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F16, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U, 3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(2U, 1U, 1U, 4U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 13U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 5U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(25U), 1, DataType::F32, DataLayout::NCHW), + TensorInfo(TensorShape(4U), 1, DataType::QASYMM8, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), + })), + framework::dataset::make("Conv2dAttributes", { + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({1, 1}).pad({0, 0, 0, 0}), + Conv2dAttributes().stride({3, 3}).pad({0, 0, 0, 0}), + })), + framework::dataset::make("Expected", { false, false, false, false, false, false, false, true, true, true, true })), + input_info, weights_info, biases_info, conv2d_attrs, expected) +{ + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); + const TensorInfo sketch_weights_info = sketch.create_tensor_info(weights_info); + const TensorInfo sketch_biases_info = sketch.create_tensor_info(biases_info); + bool is_valid = bool(GpuConv2d::validate_op(sketch, &sketch_input_info, &sketch_weights_info, &sketch_biases_info, conv2d_attrs)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +template <typename T> +using DynamicFusionGpuDirectConv2dFixture = DynamicFusionDirectConv2dValidationFixture<CLTensor, CLAccessor, GpuConv2d, T>; + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDirectConv2dFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDirectConv2dFixture<half>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDirectConv2dFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U), + TensorShape(19U, 5U, 16U, 4U), + TensorShape(13U, 5U, 17U, 2U), + TensorShape(32U, 37U, 13U) } ), + framework::dataset::make("StrideX", { 1, 3, 1, 1 })), + framework::dataset::make("StrideY", { 1, 3, 2, 1 })), + framework::dataset::make("PadX", { 1, 3, 0, 4 })), + framework::dataset::make("PadY", { 1, 3, 0, 4 })), + framework::dataset::make("KernelSize", { 3, 8, 1, 9 })), + framework::dataset::make("NumKernels", { 17, 3, 1, 19 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32, 0.0, abs_tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuDirectConv2dFixture<float>, framework::DatasetMode::NIGHTLY, + combine(combine(combine(zip(zip(zip(zip(zip( + framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ), + framework::dataset::make("StrideX", { 1 })), + framework::dataset::make("StrideY", { 1 })), + framework::dataset::make("PadX", { 1 })), + framework::dataset::make("PadY", { 1 })), + framework::dataset::make("KernelSize", { 9 })), + framework::dataset::make("NumKernels", { 3 })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + validate(CLAccessor(_target), _reference, tolerance_f32, 0.0, abs_tolerance_f32); +} +// clang-format on +// *INDENT-ON* + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // DIRECT_CONV2D TEST_SUITE_END() // CONV2D TEST_SUITE_END() // DYNAMIC_FUSION TEST_SUITE_END() // CL diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h index 488d449782..e0aecf5ed4 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -28,8 +28,9 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" @@ -49,6 +50,36 @@ namespace test { namespace validation { +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch(tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace + +/** General Conv2d fixture + * Adapted from tests/validation/fixtures/ConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760; remove Gpu from name + */ template <typename TensorType, typename AccessorType, typename FunctionType, typename T> class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture { @@ -74,28 +105,6 @@ public: } protected: - template <typename U> - void fill(U &&tensor, int i) - { - switch(tensor.data_type()) - { - case DataType::F16: - { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; - library->fill(tensor, distribution, i); - break; - } - case DataType::F32: - { - std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); - library->fill(tensor, distribution, i); - break; - } - default: - library->fill_tensor_uniform(tensor, i); - } - } - // Given input is in nchw format TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, Conv2dAttributes conv2d_attr) { @@ -201,6 +210,145 @@ public: data_type, data_layout, quantization_info, quantization_info); } }; + +/** Specific Conv2d method: Direct Conv2d fixture + * Adapted from tests/validation/fixtures/DirectConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760 + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; + + template <typename...> + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, + DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + + TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, { 1U, 1U } /* dilation */); + + TensorInfo input_info = TensorInfo(input_shape, 1, data_type); + TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type); + + const TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info); + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type, bias_data_type, quantization_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info); + } + +protected: + TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const Conv2dAttributes &conv2d_attr, + DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info, const DataLayout &data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); + ARM_COMPUTE_UNUSED(quantization_info); + // Dataset shapes are in NCHW layout + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Create sketch tensors + auto input_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout)); + auto weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, data_type, data_layout)); + auto bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout)); + auto dst_info = sketch.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, &dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for(auto &data : runtime.get_auxiliary_tensors()) + { + auto tensor = data.first; + const auto aux_mem_req = data.second; + tensor->allocator()->init(*data.first->info(), aux_mem_req.alignment); + tensor->allocator()->allocate(); + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(input_info); + t_weight.allocator()->init(weight_info); + t_bias.allocator()->init(bias_info); + t_dst.allocator()->init(dst_info); + + ARM_COMPUTE_ASSERT(t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info) + { + // Create reference + SimpleTensor<T> src{ input_shape, data_type, 1, quantization_info }; + SimpleTensor<T> weights{ weights_shape, data_type, 1, quantization_info }; + SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, quantization_info }; + + // Fill reference + fill(src, 0); + fill(weights, 1); + fill(bias, 2); + + SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info); + return dst; + } + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationFixture : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, + DataLayout data_layout) + { + DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, + QuantizationInfo(), + data_layout); + } +}; + } // namespace validation } // namespace test } // namespace arm_compute diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 2978a238e5..61f0eb9cec 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -38,9 +38,9 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/experimental/IPostOp.h" #include "arm_compute/core/experimental/PostOps.h" -#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" |