diff options
Diffstat (limited to 'tests/validation/fixtures/dynamic_fusion/gpu')
5 files changed, 406 insertions, 250 deletions
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h index 6498a06e03..ca4de11a15 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,14 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE -#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" - #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" @@ -36,13 +35,11 @@ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" #include "tests/CL/CLAccessor.h" - #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" - -#include "tests/validation/Validation.h" #include "tests/validation/reference/DepthwiseConvolutionLayer.h" +#include "tests/validation/Validation.h" using namespace arm_compute::experimental::dynamic_fusion; @@ -56,22 +53,30 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture { public: - using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value - || std::is_same<typename std::decay<T>::type, int8_t>::value, - int32_t, T >::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T - - void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &pad_stride, const Size2D &dilation, - const unsigned int depth_multiplier, const DataType data_type, const DataLayout data_layout) + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &pad_stride, + const Size2D &dilation, + const unsigned int depth_multiplier, + const DataType data_type, + const DataLayout data_layout) { - ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout + ARM_COMPUTE_ERROR_ON(data_layout != + DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout DepthwiseConv2dAttributes dwc_conv2d_attr; - const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), pad_stride.pad_bottom()); + const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), + pad_stride.pad_bottom()); dwc_conv2d_attr.pad(padding_2d) - .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) - .dilation(dilation) - .depth_multiplier(depth_multiplier) - .dimension_rounding_type(pad_stride.round()); + .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) + .dilation(dilation) + .depth_multiplier(depth_multiplier) + .dimension_rounding_type(pad_stride.round()); // Calculate Output and Weight Shapes TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height); @@ -79,8 +84,9 @@ public: const TensorInfo in_info(input_shape, 1, data_type); const TensorInfo we_info(weights_shape, 1, data_type); - const ConvolutionInfo info{ pad_stride, depth_multiplier, ActivationLayerInfo(), dilation }; - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); + const ConvolutionInfo info{pad_stride, depth_multiplier, ActivationLayerInfo(), dilation}; + const TensorShape output_shape = + misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); weights_shape.set(2, output_shape.z()); const TensorShape bias_shape = TensorShape(weights_shape[2]); @@ -95,11 +101,11 @@ protected: template <typename U> void fill(U &&tensor, int i) { - switch(tensor.data_type()) + switch (tensor.data_type()) { case DataType::F16: { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; library->fill(tensor, distribution, i); break; } @@ -115,7 +121,10 @@ protected: } // Given input is in nchw format - TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, const DepthwiseConv2dAttributes dwc_conv2d_attr) + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + const DepthwiseConv2dAttributes dwc_conv2d_attr) { ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); @@ -125,24 +134,24 @@ protected: // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Create sketch tensors - TensorInfo input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); - TensorInfo weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); - TensorInfo bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); - TensorInfo dst_info = context.create_tensor_info(); + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); - ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, dwc_conv2d_attr); - GpuOutput::create_op(sketch, ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, dwc_conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); // Configure runtime ClWorkloadRuntime runtime; runtime.configure(sketch); // (Important) Allocate auxiliary tensor memory if there are any - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -158,10 +167,10 @@ protected: TensorType t_dst{}; // Initialize user tensors - t_input.allocator()->init(input_info); - t_weight.allocator()->init(weight_info); - t_bias.allocator()->init(bias_info); - t_dst.allocator()->init(dst_info); + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); // Allocate and fill user tensors t_input.allocator()->allocate(); @@ -174,17 +183,20 @@ protected: fill(AccessorType(t_bias), 2); // Run runtime - runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); return t_dst; } - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, - const TensorShape &output_shape, DepthwiseConv2dAttributes dwc_conv2d_attr) + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + DepthwiseConv2dAttributes dwc_conv2d_attr) { // Create reference - SimpleTensor<T> src{ input_shape, _data_type, 1 }; - SimpleTensor<T> weight{ weights_shape, _data_type, 1 }; - SimpleTensor<TBias> bias{ bias_shape, _data_type, 1 }; + SimpleTensor<T> src{input_shape, _data_type, 1}; + SimpleTensor<T> weight{weights_shape, _data_type, 1}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1}; fill(src, 0); fill(weight, 1); @@ -195,10 +207,13 @@ protected: auto bias_nchw = bias; auto output_shape_nchw = output_shape; - PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, dwc_conv2d_attr.pad().top, - dwc_conv2d_attr.pad().bottom, + PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), + dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, + dwc_conv2d_attr.pad().top, dwc_conv2d_attr.pad().bottom, DimensionRoundingType{}); - auto dst_nchw = reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); + auto dst_nchw = + reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, + dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); return dst_nchw; } @@ -209,16 +224,23 @@ protected: }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuDepthwiseConv2dValidationFixture : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuDepthwiseConv2dValidationFixture + : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &info, const Size2D &dilation, const unsigned int depth_multiplier, DataType data_type, DataLayout data_layout) + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &info, + const Size2D &dilation, + const unsigned int depth_multiplier, + DataType data_type, + DataLayout data_layout) { - DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, kernel_size, info, dilation, - depth_multiplier, data_type, data_layout); + DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, kernel_size, info, dilation, depth_multiplier, data_type, data_layout); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h index e30a564930..1f4e223b93 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,13 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE -#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" - #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" @@ -38,9 +37,9 @@ #include "tests/CL/CLAccessor.h" #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" #include "tests/validation/reference/ConvolutionLayer.h" #include "tests/validation/reference/Permute.h" +#include "tests/validation/Validation.h" using namespace arm_compute::experimental::dynamic_fusion; @@ -55,11 +54,11 @@ namespace template <typename U> void fill(U &&tensor, int i) { - switch(tensor.data_type()) + switch (tensor.data_type()) { case DataType::F16: { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; library->fill(tensor, distribution, i); break; } @@ -84,12 +83,21 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture { public: - using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value - || std::is_same<typename std::decay<T>::type, int8_t>::value, - int32_t, T >::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T - - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, const PadStrideInfo &info, const Size2D &dilation, DataType data_type, - DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info) + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + const PadStrideInfo &info, + const Size2D &dilation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info, + QuantizationInfo weight_quantization_info) { ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, dilation); @@ -100,12 +108,15 @@ public: _weight_quantization_info = weight_quantization_info; _bias_data_type = _is_quantized ? DataType::S32 : data_type; _target = compute_target(input_shape, weights_shape, bias_shape, conv2d_attr); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr); } protected: // Given input is in nchw format - TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, Conv2dAttributes conv2d_attr) + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + Conv2dAttributes conv2d_attr) { ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); permute(input_shape, PermutationVector(2U, 0U, 1U)); @@ -114,23 +125,23 @@ protected: // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Create sketch tensors - TensorInfo input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); - TensorInfo weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); - TensorInfo bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); - TensorInfo dst_info = context.create_tensor_info(); + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); - ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, conv2d_attr); - GpuOutput::create_op(sketch, ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); // Configure runtime ClWorkloadRuntime runtime; runtime.configure(sketch); // (Important) Allocate auxiliary tensor memory if there are any - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -145,10 +156,10 @@ protected: TensorType t_dst{}; // Initialize user tensors - t_input.allocator()->init(input_info); - t_weight.allocator()->init(weight_info); - t_bias.allocator()->init(bias_info); - t_dst.allocator()->init(dst_info); + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); // Allocate and fill user tensors t_input.allocator()->allocate(); @@ -161,17 +172,20 @@ protected: fill(AccessorType(t_bias), 2); // Run runtime - runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); return t_dst; } - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, - const TensorShape &output_shape, Conv2dAttributes conv2d_attr) + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + Conv2dAttributes conv2d_attr) { // Create reference - SimpleTensor<T> src{ input_shape, _data_type, 1, _quantization_info }; - SimpleTensor<T> weight{ weights_shape, _data_type, 1, _weight_quantization_info }; - SimpleTensor<TBias> bias{ bias_shape, _data_type, 1, _quantization_info }; + SimpleTensor<T> src{input_shape, _data_type, 1, _quantization_info}; + SimpleTensor<T> weight{weights_shape, _data_type, 1, _weight_quantization_info}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1, _quantization_info}; fill(src, 0); fill(weight, 1); @@ -182,9 +196,11 @@ protected: auto bias_nchw = bias; auto output_shape_nchw = output_shape; - PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, + PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, + conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, DimensionRoundingType{}); - auto dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, conv2d_attr.dilation()); + auto dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, + legacy_pad_stride, conv2d_attr.dilation()); return dst_nchw; } @@ -199,14 +215,23 @@ protected: }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuConv2dValidationFixture : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuConv2dValidationFixture + : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape output_shape, TensorShape bias_shape, - const PadStrideInfo &info, const Size2D &dialation, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info) + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape output_shape, + TensorShape bias_shape, + const PadStrideInfo &info, + const Size2D &dialation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info) { - DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, output_shape, bias_shape, info, dialation, - data_type, data_layout, quantization_info, quantization_info); + DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, weights_shape, output_shape, bias_shape, info, dialation, data_type, data_layout, + quantization_info, quantization_info); } }; @@ -218,10 +243,19 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture { public: - using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type; - - void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, - DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) + using TBias = + typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type; + + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout) { ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout @@ -230,20 +264,30 @@ public: const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, { 1U, 1U } /* dilation */); + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, {1U, 1U} /* dilation */); TensorInfo input_info = TensorInfo(input_shape, 1, data_type); TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type); - const TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info); + const TensorShape output_shape = + misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info); - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type, bias_data_type, quantization_info, data_layout); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, quantization_info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type, + bias_data_type, quantization_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, + bias_data_type, quantization_info); } protected: - TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const Conv2dAttributes &conv2d_attr, - DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info, const DataLayout &data_layout) + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const Conv2dAttributes &conv2d_attr, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info, + const DataLayout &data_layout) { ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); ARM_COMPUTE_UNUSED(quantization_info); @@ -253,8 +297,8 @@ protected: permute(output_shape, PermutationVector(2U, 0U, 1U)); auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Create sketch tensors auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout)); @@ -262,14 +306,14 @@ protected: auto bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout)); auto dst_info = context.create_tensor_info(); - ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, conv2d_attr); - GpuOutput::create_op(sketch, ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); // Configure runtime ClWorkloadRuntime runtime; runtime.configure(sketch); - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -284,10 +328,10 @@ protected: TensorType t_dst{}; // Initialize user tensors - t_input.allocator()->init(input_info); - t_weight.allocator()->init(weight_info); - t_bias.allocator()->init(bias_info); - t_dst.allocator()->init(dst_info); + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); ARM_COMPUTE_ASSERT(t_input.info()->is_resizable()); ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable()); @@ -310,17 +354,23 @@ protected: fill(AccessorType(t_bias), 2); // Run runtime - runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); return t_dst; } - SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - DataType data_type, DataType bias_data_type, QuantizationInfo quantization_info) + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + const PadStrideInfo &info, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info) { // Create reference - SimpleTensor<T> src{ input_shape, data_type, 1, quantization_info }; - SimpleTensor<T> weights{ weights_shape, data_type, 1, quantization_info }; - SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, quantization_info }; + SimpleTensor<T> src{input_shape, data_type, 1, quantization_info}; + SimpleTensor<T> weights{weights_shape, data_type, 1, quantization_info}; + SimpleTensor<TBias> bias{bias_shape, bias_data_type, 1, quantization_info}; // Fill reference fill(src, 0); @@ -335,19 +385,27 @@ protected: }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionDirectConv2dValidationFixture : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionDirectConv2dValidationFixture + : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, - DataLayout data_layout) + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + DataLayout data_layout) { - DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, - QuantizationInfo(), - data_layout); + DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(), + data_layout); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h index 567322f181..69bd0efbdc 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE -#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/TensorInfo.h" @@ -47,9 +47,15 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture { public: - void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false) + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops = false) { - _ref_op = ref_op; + _ref_op = ref_op; _is_inplace = is_inplace; _data_type = data_type; _fuse = fuse_two_ops; @@ -63,12 +69,12 @@ protected: template <typename U> void fill(U &&tensor, int i) { - if(is_data_type_float(tensor.data_type())) + if (is_data_type_float(tensor.data_type())) { - switch(_ref_op) + switch (_ref_op) { case ArithmeticOperation::DIV: - library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) }); + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)}); break; case ArithmeticOperation::POWER: library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); @@ -77,12 +83,12 @@ protected: library->fill_tensor_uniform(tensor, i); } } - else if(tensor.data_type() == DataType::S32) + else if (tensor.data_type() == DataType::S32) { - switch(_ref_op) + switch (_ref_op) { case ArithmeticOperation::DIV: - library->fill_tensor_uniform_ranged(tensor, i, { std::pair<int32_t, int32_t>(-1U, 1U) }); + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<int32_t, int32_t>(-1U, 1U)}); break; default: library->fill_tensor_uniform(tensor, i); @@ -98,27 +104,27 @@ protected: { // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Fuse first element wise binary Op - TensorInfo lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); - TensorInfo rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); - TensorInfo dst_info = context.create_tensor_info(); + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); - TensorInfo rhs_info_fuse; + ITensorInfo *rhs_info_fuse = nullptr; - ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info); - if(_fuse) + if (_fuse) { rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type)); - ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse); - GpuOutput::create_op(sketch, ans2_info, &dst_info); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, dst_info); } else { - GpuOutput::create_op(sketch, ans_info, &dst_info); + GpuOutput::create_op(sketch, ans_info, dst_info); } // Configure runtime @@ -126,7 +132,7 @@ protected: runtime.configure(sketch); // (Important) Allocate auxiliary tensor memory if there are any - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -142,12 +148,12 @@ protected: TensorType t_dst{}; // Initialize user tensors - t_lhs.allocator()->init(lhs_info); - t_rhs.allocator()->init(rhs_info); - t_dst.allocator()->init(dst_info); - if(_fuse) + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + if (_fuse) { - t_rhs_fuse.allocator()->init(rhs_info_fuse); + t_rhs_fuse.allocator()->init(*rhs_info_fuse); } // Allocate and fill user tensors @@ -155,26 +161,26 @@ protected: t_lhs.allocator()->allocate(); t_rhs.allocator()->allocate(); t_dst.allocator()->allocate(); - if(_fuse) + if (_fuse) { t_rhs_fuse.allocator()->allocate(); } fill(AccessorType(t_lhs), 0); fill(AccessorType(t_rhs), 1); - if(_fuse) + if (_fuse) { fill(AccessorType(t_rhs_fuse), 2); } // Run runtime - if(_fuse) + if (_fuse) { - runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst }); + runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst}); } else { - runtime.run({ &t_lhs, &t_rhs, &t_dst }); + runtime.run({&t_lhs, &t_rhs, &t_dst}); } return t_dst; @@ -186,18 +192,18 @@ protected: const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); // Create reference - SimpleTensor<T> ref_lhs{ shape0, _data_type, 1, QuantizationInfo() }; - SimpleTensor<T> ref_rhs{ shape1, _data_type, 1, QuantizationInfo() }; - SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() }; - SimpleTensor<T> ref_dst{ out_shape, _data_type, 1, QuantizationInfo() }; - SimpleTensor<T> ref_dst_fuse{ out_shape_fuse, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst{out_shape, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst_fuse{out_shape_fuse, _data_type, 1, QuantizationInfo()}; // Fill reference fill(ref_lhs, 0); fill(ref_rhs, 1); reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); - if(_fuse) + if (_fuse) { fill(ref_rhs_fuse, 2); reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); @@ -206,46 +212,62 @@ protected: return *ret; } - ArithmeticOperation _ref_op{ ArithmeticOperation::ADD }; + ArithmeticOperation _ref_op{ArithmeticOperation::ADD}; TensorType _target{}; SimpleTensor<T> _reference{}; DataType _data_type{}; DataLayout _data_layout{}; - bool _is_inplace{ false }; - bool _fuse{ false }; + bool _is_inplace{false}; + bool _fuse{false}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace) { - DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace) + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + DataType data_type, + bool is_inplace) { - DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops) + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops) { - DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); } }; } // namespace validation } // namespace test } // namespace arm_compute -#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE */ +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h index c6ac4b91db..65a3363e24 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,7 +28,6 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" - #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" @@ -39,10 +38,10 @@ #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" #include "tests/validation/Helpers.h" -#include "tests/validation/Validation.h" #include "tests/validation/reference/GEMM.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/ReshapeLayer.h" +#include "tests/validation/Validation.h" using namespace arm_compute::experimental::dynamic_fusion; @@ -57,11 +56,11 @@ namespace template <typename U> void fill(U &&tensor, int i) { - switch(tensor.data_type()) + switch (tensor.data_type()) { case DataType::F16: { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; library->fill(tensor, distribution, i); break; } @@ -80,67 +79,83 @@ void fill(U &&tensor, int i) template <typename TensorType, typename AccessorType, typename FunctionType, typename T> class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture { - public: - void setup(TensorShape lhs_shape, TensorShape rhs_shape, TensorShape output_shape, bool transpose_a, bool transpose_b, - int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) { //For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. - if(transpose_a) + if (transpose_a) { permute(lhs_shape, PermutationVector(1U, 0U)); } - if(transpose_b) + if (transpose_b) { permute(rhs_shape, PermutationVector(1U, 0U)); } // Skip configurations unsupported by the device. _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); - if(!_device_supports_export_to_cl_image && export_rhs_to_cl_image) + if (!_device_supports_export_to_cl_image && export_rhs_to_cl_image) { ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); framework::ARM_COMPUTE_PRINT_INFO(); return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs. } - _target = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image, data_type); + _target = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image, + data_type); _reference = compute_reference(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, data_type); } protected: - TensorType compute_target(TensorShape &shape_a, TensorShape &shape_b, bool transpose_a, bool transpose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + TensorType compute_target(TensorShape &shape_a, + TensorShape &shape_b, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) { ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); CLScheduler::get().default_reinit(); // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Create sketch tensors - TensorInfo lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type)); - TensorInfo rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type)); - TensorInfo dst_info = context.create_tensor_info(); + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); - MatMulAttributes matmul_attr {}; + MatMulAttributes matmul_attr{}; matmul_attr.adj_lhs(transpose_a); matmul_attr.adj_rhs(transpose_b); - GpuMatMulSettings matmul_settings {}; + GpuMatMulSettings matmul_settings{}; matmul_settings.m0(M0); matmul_settings.n0(N0); matmul_settings.k0(K0); - ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info, matmul_attr, matmul_settings); - GpuOutput::create_op(sketch, ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); // Configure runtime ClWorkloadRuntime runtime; runtime.configure(sketch); - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -155,9 +170,9 @@ protected: TensorType t_dst{}; // Initialize user tensors - t_lhs.allocator()->init(lhs_info); - t_rhs.allocator()->init(rhs_info); - t_dst.allocator()->init(dst_info); + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); ARM_COMPUTE_ASSERT(t_lhs.info()->is_resizable()); ARM_COMPUTE_ASSERT(t_rhs.info()->is_resizable()); @@ -176,12 +191,17 @@ protected: fill(AccessorType(t_rhs), 1); // Run runtime - runtime.run({ &t_lhs, &t_rhs, &t_dst }); + runtime.run({&t_lhs, &t_rhs, &t_dst}); return t_dst; } - SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type) + SimpleTensor<T> compute_reference(const TensorShape &shape_a, + const TensorShape &shape_b, + const TensorShape &output_shape, + bool pretranspose_a, + bool pretranspose_b, + DataType data_type) { // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D // This is necessary unless we choose to extend gemm reference for 5D+ tensors @@ -190,9 +210,9 @@ protected: TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ); // Create reference - SimpleTensor<T> a{ shape_a_collapsed, data_type, 1 }; - SimpleTensor<T> b{ shape_b_collapsed, data_type, 1 }; - SimpleTensor<T> c{ output_shape_collapsed, data_type, 1 }; + SimpleTensor<T> a{shape_a_collapsed, data_type, 1}; + SimpleTensor<T> b{shape_b_collapsed, data_type, 1}; + SimpleTensor<T> c{output_shape_collapsed, data_type, 1}; // Fill reference fill(a, 0); @@ -213,27 +233,27 @@ protected: b_transposed_shape.set(1, b.shape().x()); // Define transposed tensors - SimpleTensor<T> a_transposed{ a_transposed_shape, data_type }; - SimpleTensor<T> b_transposed{ b_transposed_shape, data_type }; + SimpleTensor<T> a_transposed{a_transposed_shape, data_type}; + SimpleTensor<T> b_transposed{b_transposed_shape, data_type}; //pretranspose a if necessary - if(pretranspose_a) + if (pretranspose_a) { a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U)); } // pretranspose b if necessary - if(pretranspose_b) + if (pretranspose_b) { b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U)); } // Use transposed tensors if boolean enabled else use original tensors - SimpleTensor<T> result = reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); - + SimpleTensor<T> result = + reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); // We reshape the gemm output back if the tensor is high dimensional - if(output_shape_collapsed != output_shape) + if (output_shape_collapsed != output_shape) { // std::cout << "called reshape: \n"; result = reference::reshape_layer(result, output_shape); @@ -244,20 +264,30 @@ protected: CLTensor _target{}; SimpleTensor<T> _reference{}; - bool _device_supports_export_to_cl_image{ false }; - bool _device_supports_mmul{ false }; + bool _device_supports_export_to_cl_image{false}; + bool _device_supports_mmul{false}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuMatMulValidationFixture : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuMatMulValidationFixture + : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { - public: - void setup(TensorShape lhs_shape, TensorShape rhs_shape, TensorShape output_shape, bool transpose_a, bool transpose_b, - int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) { ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); - DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, - N0, K0, false /* export_rhs_to_cl_image bias */, data_type); + DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, N0, K0, + false /* export_rhs_to_cl_image bias */, data_type); } }; diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h index 34f2647741..dd3519b549 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,14 +28,13 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" - #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" #include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" -#include "src/dynamic_fusion/utils/Utils.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" +#include "src/dynamic_fusion/utils/Utils.h" #include "tests/CL/CLAccessor.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/PoolingLayer.h" @@ -54,19 +53,20 @@ class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture public: void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type, bool mixed_precision) { - _target = compute_target(input_shape, pool_attr, data_type, mixed_precision); - _reference = compute_reference(input_shape, convert_pool_attr_to_pool_info(pool_attr, mixed_precision), data_type); + _target = compute_target(input_shape, pool_attr, data_type, mixed_precision); + _reference = + compute_reference(input_shape, convert_pool_attr_to_pool_info(pool_attr, mixed_precision), data_type); } protected: template <typename U> void fill(U &&tensor, int i) { - switch(tensor.data_type()) + switch (tensor.data_type()) { case DataType::F16: { - arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; library->fill(tensor, distribution, i); break; } @@ -82,7 +82,10 @@ protected: } // Given input is in nchw format - TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type, bool mixed_precision) + TensorType compute_target(TensorShape input_shape, + const Pool2dAttributes &pool_attr, + const DataType data_type, + bool mixed_precision) { CLScheduler::get().default_reinit(); @@ -91,8 +94,8 @@ protected: // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto context = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &context }; + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; // Create sketch tensors auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC)); @@ -101,14 +104,14 @@ protected: // Create Pool2dSettings GpuPool2dSettings pool_settings = GpuPool2dSettings().mixed_precision(mixed_precision); - ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, pool_attr, pool_settings); - GpuOutput::create_op(sketch, ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); // Configure runtime ClWorkloadRuntime runtime; runtime.configure(sketch); // (Important) Allocate auxiliary tensor memory if there are any - for(auto &data : runtime.get_auxiliary_tensors()) + for (auto &data : runtime.get_auxiliary_tensors()) { CLTensor *tensor = std::get<0>(data); TensorInfo info = std::get<1>(data); @@ -121,8 +124,8 @@ protected: TensorType t_dst{}; // Initialize user tensors - t_input.allocator()->init(input_info); - t_dst.allocator()->init(dst_info); + t_input.allocator()->init(*input_info); + t_dst.allocator()->init(*dst_info); // Allocate and fill user tensors t_input.allocator()->allocate(); @@ -131,7 +134,7 @@ protected: fill(AccessorType(t_input), 0); // Run runtime - runtime.run({ &t_input, &t_dst }); + runtime.run({&t_input, &t_dst}); return t_dst; } @@ -149,36 +152,57 @@ protected: }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuPool2dValidationFixture : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuPool2dValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape input_shape, PoolingType pool_type, Size2D pool_size, Padding2D pad, Size2D stride, bool exclude_padding, DataType data_type) + void setup(TensorShape input_shape, + PoolingType pool_type, + Size2D pool_size, + Padding2D pad, + Size2D stride, + bool exclude_padding, + DataType data_type) { - DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, - Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(exclude_padding), - data_type, false); + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding( + exclude_padding), + data_type, false); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuPool2dMixedPrecisionValidationFixture : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuPool2dMixedPrecisionValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: - void setup(TensorShape input_shape, PoolingType pool_type, Size2D pool_size, Padding2D pad, Size2D stride, bool exclude_padding, DataType data_type, bool mixed_precision) + void setup(TensorShape input_shape, + PoolingType pool_type, + Size2D pool_size, + Padding2D pad, + Size2D stride, + bool exclude_padding, + DataType data_type, + bool mixed_precision) { - DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, - Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(exclude_padding), - data_type, mixed_precision); + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding( + exclude_padding), + data_type, mixed_precision); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> -class DynamicFusionGpuPool2dSpecialValidationFixture : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +class DynamicFusionGpuPool2dSpecialValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> { public: void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type) { - DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, pool_attr, data_type, false); + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, pool_attr, data_type, false); } }; |