diff options
Diffstat (limited to 'tests/validation/fixtures/dynamic_fusion/gpu')
5 files changed, 1415 insertions, 0 deletions
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h new file mode 100644 index 0000000000..ca4de11a15 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/DepthwiseConvolutionLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &pad_stride, + const Size2D &dilation, + const unsigned int depth_multiplier, + const DataType data_type, + const DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != + DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout + + DepthwiseConv2dAttributes dwc_conv2d_attr; + const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), + pad_stride.pad_bottom()); + dwc_conv2d_attr.pad(padding_2d) + .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) + .dilation(dilation) + .depth_multiplier(depth_multiplier) + .dimension_rounding_type(pad_stride.round()); + + // Calculate Output and Weight Shapes + TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(input_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + + const ConvolutionInfo info{pad_stride, depth_multiplier, ActivationLayerInfo(), dilation}; + const TensorShape output_shape = + misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); + + weights_shape.set(2, output_shape.z()); + const TensorShape bias_shape = TensorShape(weights_shape[2]); + + _data_type = data_type; + _data_layout = data_layout; + _target = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + const DepthwiseConv2dAttributes dwc_conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + + // Our test shapes are assumed in NCHW data layout, thus the permutation + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, dwc_conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + DepthwiseConv2dAttributes dwc_conv2d_attr) + { + // Create reference + SimpleTensor<T> src{input_shape, _data_type, 1}; + SimpleTensor<T> weight{weights_shape, _data_type, 1}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1}; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), + dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, + dwc_conv2d_attr.pad().top, dwc_conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = + reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, + dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuDepthwiseConv2dValidationFixture + : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &info, + const Size2D &dilation, + const unsigned int depth_multiplier, + DataType data_type, + DataLayout data_layout) + { + DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, kernel_size, info, dilation, depth_multiplier, data_type, data_layout); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h new file mode 100644 index 0000000000..1f4e223b93 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace + +/** General Conv2d fixture + * Adapted from tests/validation/fixtures/ConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760; remove Gpu from name + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + const PadStrideInfo &info, + const Size2D &dilation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info, + QuantizationInfo weight_quantization_info) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, dilation); + _data_type = data_type; + _data_layout = data_layout; + _is_quantized = is_data_type_quantized_asymmetric(data_type); + _quantization_info = quantization_info; + _weight_quantization_info = weight_quantization_info; + _bias_data_type = _is_quantized ? DataType::S32 : data_type; + _target = compute_target(input_shape, weights_shape, bias_shape, conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr); + } + +protected: + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + Conv2dAttributes conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + CLScheduler::get().default_reinit(); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + Conv2dAttributes conv2d_attr) + { + // Create reference + SimpleTensor<T> src{input_shape, _data_type, 1, _quantization_info}; + SimpleTensor<T> weight{weights_shape, _data_type, 1, _weight_quantization_info}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1, _quantization_info}; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, + conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, + legacy_pad_stride, conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataType _bias_data_type{}; + DataLayout _data_layout{}; + QuantizationInfo _quantization_info{}; + QuantizationInfo _weight_quantization_info{}; + bool _is_quantized = false; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuConv2dValidationFixture + : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape output_shape, + TensorShape bias_shape, + const PadStrideInfo &info, + const Size2D &dialation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info) + { + DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, weights_shape, output_shape, bias_shape, info, dialation, data_type, data_layout, + quantization_info, quantization_info); + } +}; + +/** Specific Conv2d method: Direct Conv2d fixture + * Adapted from tests/validation/fixtures/DirectConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760 + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = + typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type; + + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + + TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, {1U, 1U} /* dilation */); + + TensorInfo input_info = TensorInfo(input_shape, 1, data_type); + TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type); + + const TensorShape output_shape = + misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info); + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type, + bias_data_type, quantization_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, + bias_data_type, quantization_info); + } + +protected: + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const Conv2dAttributes &conv2d_attr, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info, + const DataLayout &data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); + ARM_COMPUTE_UNUSED(quantization_info); + // Dataset shapes are in NCHW layout + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout)); + auto weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, data_type, data_layout)); + auto bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout)); + auto dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + ARM_COMPUTE_ASSERT(t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + const PadStrideInfo &info, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info) + { + // Create reference + SimpleTensor<T> src{input_shape, data_type, 1, quantization_info}; + SimpleTensor<T> weights{weights_shape, data_type, 1, quantization_info}; + SimpleTensor<TBias> bias{bias_shape, bias_data_type, 1, quantization_info}; + + // Fill reference + fill(src, 0); + fill(weights, 1); + fill(bias, 2); + + SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info); + return dst; + } + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationFixture + : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + DataLayout data_layout) + { + DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(), + data_layout); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h new file mode 100644 index 0000000000..69bd0efbdc --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/ElementwiseOperations.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops = false) + { + _ref_op = ref_op; + _is_inplace = is_inplace; + _data_type = data_type; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + if (is_data_type_float(tensor.data_type())) + { + switch (_ref_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)}); + break; + case ArithmeticOperation::POWER: + library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else if (tensor.data_type() == DataType::S32) + { + switch (_ref_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<int32_t, int32_t>(-1U, 1U)}); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Fuse first element wise binary Op + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *rhs_info_fuse = nullptr; + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info); + + if (_fuse) + { + rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + if (_fuse) + { + t_rhs_fuse.allocator()->init(*rhs_info_fuse); + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + if (_fuse) + { + t_rhs_fuse.allocator()->allocate(); + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if (_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + + // Run runtime + if (_fuse) + { + runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst}); + } + else + { + runtime.run({&t_lhs, &t_rhs, &t_dst}); + } + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); + + // Create reference + SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst{out_shape, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst_fuse{out_shape_fuse, _data_type, 1, QuantizationInfo()}; + + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + + reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); + if (_fuse) + { + fill(ref_rhs_fuse, 2); + reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); + } + SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst; + return *ret; + } + + ArithmeticOperation _ref_op{ArithmeticOperation::ADD}; + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; + bool _is_inplace{false}; + bool _fuse{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + DataType data_type, + bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h new file mode 100644 index 0000000000..4c1cc94d3d --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/ReshapeLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + //For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. + if (transpose_a) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + if (transpose_b) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + // Skip configurations unsupported by the device. + _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + if (!_device_supports_export_to_cl_image && export_rhs_to_cl_image) + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs. + } + + _target = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image, + data_type); + _reference = compute_reference(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, data_type); + } + +protected: + TensorType compute_target(TensorShape &shape_a, + TensorShape &shape_b, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); + CLScheduler::get().default_reinit(); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + MatMulAttributes matmul_attr{}; + matmul_attr.adj_lhs(transpose_a); + matmul_attr.adj_rhs(transpose_b); + + GpuMatMulSettings matmul_settings{}; + matmul_settings.m0(M0); + matmul_settings.n0(N0); + matmul_settings.k0(K0); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + + ARM_COMPUTE_ASSERT(t_lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + + // Run runtime + runtime.run({&t_lhs, &t_rhs, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape_a, + const TensorShape &shape_b, + const TensorShape &output_shape, + bool pretranspose_a, + bool pretranspose_b, + DataType data_type) + { + // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 3D + // This is necessary unless we choose to extend gemm reference for 5D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ); + TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ); + + // Create reference + SimpleTensor<T> a{shape_a_collapsed, data_type, 1}; + SimpleTensor<T> b{shape_b_collapsed, data_type, 1}; + SimpleTensor<T> c{output_shape_collapsed, data_type, 1}; + + // Fill reference + fill(a, 0); + fill(b, 1); + + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape()); + a_transposed_shape.set(0, a.shape().y()); + a_transposed_shape.set(1, a.shape().x()); + + TensorShape b_transposed_shape(b.shape()); + b_transposed_shape.set(0, b.shape().y()); + b_transposed_shape.set(1, b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{a_transposed_shape, data_type}; + SimpleTensor<T> b_transposed{b_transposed_shape, data_type}; + + //pretranspose a if necessary + if (pretranspose_a) + { + a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U)); + } + + // pretranspose b if necessary + if (pretranspose_b) + { + b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U)); + } + + // Use transposed tensors if boolean enabled else use original tensors + SimpleTensor<T> result = + reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); + + // We reshape the gemm output back if the tensor is high dimensional + if (output_shape_collapsed != output_shape) + { + // std::cout << "called reshape: \n"; + result = reference::reshape_layer(result, output_shape); + } + + return result; + } + + CLTensor _target{}; + SimpleTensor<T> _reference{}; + bool _device_supports_export_to_cl_image{false}; + bool _device_supports_mmul{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuMatMulValidationFixture + : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); + DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, N0, K0, + false /* export_rhs_to_cl_image bias */, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h new file mode 100644 index 0000000000..b0c7143d91 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + +#include "src/dynamic_fusion/utils/Utils.h" +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/PoolingLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type) + { + _target = compute_target(input_shape, pool_attr, data_type); + _reference = compute_reference( + input_shape, convert_pool_attr_to_pool_info(pool_attr, true /* mixed_precision */), data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type) + { + CLScheduler::get().default_reinit(); + + // Change shape due to NHWC data layout, test shapes are NCHW + permute(input_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC)); + auto dst_info = context.create_tensor_info(); + + // Create Pool2dSettings + GpuPool2dSettings pool_settings = GpuPool2dSettings(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + + // Run runtime + runtime.run({&t_input, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type) + { + // Create reference + SimpleTensor<T> src(shape, data_type, 1, QuantizationInfo()); + // Fill reference + fill(src, 0); + return reference::pooling_layer<T>(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + PoolingType pool_type, + Size2D pool_size, + Padding2D pad, + Size2D stride, + bool exclude_padding, + DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding( + exclude_padding), + data_type); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dSpecialValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, pool_attr, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H |