diff options
Diffstat (limited to 'tests/validation/fixtures/dynamic_fusion')
12 files changed, 2785 insertions, 0 deletions
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h new file mode 100644 index 0000000000..ca4de11a15 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/DepthwiseConvolutionLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &pad_stride, + const Size2D &dilation, + const unsigned int depth_multiplier, + const DataType data_type, + const DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != + DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout + + DepthwiseConv2dAttributes dwc_conv2d_attr; + const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), + pad_stride.pad_bottom()); + dwc_conv2d_attr.pad(padding_2d) + .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) + .dilation(dilation) + .depth_multiplier(depth_multiplier) + .dimension_rounding_type(pad_stride.round()); + + // Calculate Output and Weight Shapes + TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(input_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + + const ConvolutionInfo info{pad_stride, depth_multiplier, ActivationLayerInfo(), dilation}; + const TensorShape output_shape = + misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); + + weights_shape.set(2, output_shape.z()); + const TensorShape bias_shape = TensorShape(weights_shape[2]); + + _data_type = data_type; + _data_layout = data_layout; + _target = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + const DepthwiseConv2dAttributes dwc_conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + + // Our test shapes are assumed in NCHW data layout, thus the permutation + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, dwc_conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + DepthwiseConv2dAttributes dwc_conv2d_attr) + { + // Create reference + SimpleTensor<T> src{input_shape, _data_type, 1}; + SimpleTensor<T> weight{weights_shape, _data_type, 1}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1}; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), + dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, + dwc_conv2d_attr.pad().top, dwc_conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = + reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, + dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuDepthwiseConv2dValidationFixture + : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + Size2D kernel_size, + const PadStrideInfo &info, + const Size2D &dilation, + const unsigned int depth_multiplier, + DataType data_type, + DataLayout data_layout) + { + DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, kernel_size, info, dilation, depth_multiplier, data_type, data_layout); + } +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h new file mode 100644 index 0000000000..1f4e223b93 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/ConvolutionLayer.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace + +/** General Conv2d fixture + * Adapted from tests/validation/fixtures/ConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760; remove Gpu from name + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value || + std::is_same<typename std::decay<T>::type, int8_t>::value, + int32_t, + T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T + + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape bias_shape, + TensorShape output_shape, + const PadStrideInfo &info, + const Size2D &dilation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info, + QuantizationInfo weight_quantization_info) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, dilation); + _data_type = data_type; + _data_layout = data_layout; + _is_quantized = is_data_type_quantized_asymmetric(data_type); + _quantization_info = quantization_info; + _weight_quantization_info = weight_quantization_info; + _bias_data_type = _is_quantized ? DataType::S32 : data_type; + _target = compute_target(input_shape, weights_shape, bias_shape, conv2d_attr); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr); + } + +protected: + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + Conv2dAttributes conv2d_attr) + { + ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + CLScheduler::get().default_reinit(); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *input_info = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + ITensorInfo *bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + Conv2dAttributes conv2d_attr) + { + // Create reference + SimpleTensor<T> src{input_shape, _data_type, 1, _quantization_info}; + SimpleTensor<T> weight{weights_shape, _data_type, 1, _weight_quantization_info}; + SimpleTensor<TBias> bias{bias_shape, _data_type, 1, _quantization_info}; + + fill(src, 0); + fill(weight, 1); + fill(bias, 2); + + auto src_nchw = src; + auto weights_nchw = weight; + auto bias_nchw = bias; + auto output_shape_nchw = output_shape; + + PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left, + conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom, + DimensionRoundingType{}); + auto dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, + legacy_pad_stride, conv2d_attr.dilation()); + return dst_nchw; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataType _bias_data_type{}; + DataLayout _data_layout{}; + QuantizationInfo _quantization_info{}; + QuantizationInfo _weight_quantization_info{}; + bool _is_quantized = false; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuConv2dValidationFixture + : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + TensorShape weights_shape, + TensorShape output_shape, + TensorShape bias_shape, + const PadStrideInfo &info, + const Size2D &dialation, + DataType data_type, + DataLayout data_layout, + QuantizationInfo quantization_info) + { + DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, weights_shape, output_shape, bias_shape, info, dialation, data_type, data_layout, + quantization_info, quantization_info); + } +}; + +/** Specific Conv2d method: Direct Conv2d fixture + * Adapted from tests/validation/fixtures/DirectConvolutionLayerFixture.h + * TODO: Parameterize to be fully backend agnostic: COMPMID-5760 + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture +{ +public: + using TBias = + typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type; + + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout + + TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels); + const TensorShape bias_shape(num_kernels); + const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, {1U, 1U} /* dilation */); + + TensorInfo input_info = TensorInfo(input_shape, 1, data_type); + TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type); + + const TensorShape output_shape = + misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info); + + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type, + bias_data_type, quantization_info, data_layout); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, + bias_data_type, quantization_info); + } + +protected: + TensorType compute_target(TensorShape input_shape, + TensorShape weights_shape, + const TensorShape &bias_shape, + TensorShape output_shape, + const Conv2dAttributes &conv2d_attr, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info, + const DataLayout &data_layout) + { + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); + ARM_COMPUTE_UNUSED(quantization_info); + // Dataset shapes are in NCHW layout + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout)); + auto weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, data_type, data_layout)); + auto bias_info = context.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout)); + auto dst_info = context.create_tensor_info(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_weight{}; + TensorType t_bias{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_weight.allocator()->init(*weight_info); + t_bias.allocator()->init(*bias_info); + t_dst.allocator()->init(*dst_info); + + ARM_COMPUTE_ASSERT(t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_weight.allocator()->allocate(); + t_bias.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_input.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_weight.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_bias.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_input), 0); + fill(AccessorType(t_weight), 1); + fill(AccessorType(t_bias), 2); + + // Run runtime + runtime.run({&t_input, &t_weight, &t_bias, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &input_shape, + const TensorShape &weights_shape, + const TensorShape &bias_shape, + const TensorShape &output_shape, + const PadStrideInfo &info, + DataType data_type, + DataType bias_data_type, + QuantizationInfo quantization_info) + { + // Create reference + SimpleTensor<T> src{input_shape, data_type, 1, quantization_info}; + SimpleTensor<T> weights{weights_shape, data_type, 1, quantization_info}; + SimpleTensor<TBias> bias{bias_shape, bias_data_type, 1, quantization_info}; + + // Fill reference + fill(src, 0); + fill(weights, 1); + fill(bias, 2); + + SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info); + return dst; + } + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionDirectConv2dValidationFixture + : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + int stride_x, + int stride_y, + int pad_x, + int pad_y, + unsigned int kernel_size, + unsigned int num_kernels, + DataType data_type, + DataLayout data_layout) + { + DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(), + data_layout); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h new file mode 100644 index 0000000000..69bd0efbdc --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/ElementwiseOperations.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops = false) + { + _ref_op = ref_op; + _is_inplace = is_inplace; + _data_type = data_type; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + if (is_data_type_float(tensor.data_type())) + { + switch (_ref_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)}); + break; + case ArithmeticOperation::POWER: + library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else if (tensor.data_type() == DataType::S32) + { + switch (_ref_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, {std::pair<int32_t, int32_t>(-1U, 1U)}); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Fuse first element wise binary Op + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *rhs_info_fuse = nullptr; + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info); + + if (_fuse) + { + rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + if (_fuse) + { + t_rhs_fuse.allocator()->init(*rhs_info_fuse); + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + if (_fuse) + { + t_rhs_fuse.allocator()->allocate(); + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if (_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + + // Run runtime + if (_fuse) + { + runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst}); + } + else + { + runtime.run({&t_lhs, &t_rhs, &t_dst}); + } + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); + const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); + + // Create reference + SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst{out_shape, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_dst_fuse{out_shape_fuse, _data_type, 1, QuantizationInfo()}; + + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + + reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); + if (_fuse) + { + fill(ref_rhs_fuse, 2); + reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); + } + SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst; + return *ret; + } + + ArithmeticOperation _ref_op{ArithmeticOperation::ADD}; + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataLayout _data_layout{}; + bool _is_inplace{false}; + bool _fuse{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + DataType data_type, + bool is_inplace) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture + : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(ArithmeticOperation ref_op, + const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops) + { + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h new file mode 100644 index 0000000000..4c1cc94d3d --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/Helpers.h" +#include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/ReshapeLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +template <typename U> +void fill(U &&tensor, int i) +{ + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } +} + +} // namespace +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + //For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. + if (transpose_a) + { + permute(lhs_shape, PermutationVector(1U, 0U)); + } + if (transpose_b) + { + permute(rhs_shape, PermutationVector(1U, 0U)); + } + + // Skip configurations unsupported by the device. + _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + if (!_device_supports_export_to_cl_image && export_rhs_to_cl_image) + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs. + } + + _target = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image, + data_type); + _reference = compute_reference(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, data_type); + } + +protected: + TensorType compute_target(TensorShape &shape_a, + TensorShape &shape_b, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); + CLScheduler::get().default_reinit(); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + MatMulAttributes matmul_attr{}; + matmul_attr.adj_lhs(transpose_a); + matmul_attr.adj_rhs(transpose_b); + + GpuMatMulSettings matmul_settings{}; + matmul_settings.m0(M0); + matmul_settings.n0(N0); + matmul_settings.k0(K0); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + + ARM_COMPUTE_ASSERT(t_lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable()); + + // Allocate and fill user tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!t_lhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_rhs.info()->is_resizable()); + ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable()); + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + + // Run runtime + runtime.run({&t_lhs, &t_rhs, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape_a, + const TensorShape &shape_b, + const TensorShape &output_shape, + bool pretranspose_a, + bool pretranspose_b, + DataType data_type) + { + // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 3D + // This is necessary unless we choose to extend gemm reference for 5D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ); + TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ); + + // Create reference + SimpleTensor<T> a{shape_a_collapsed, data_type, 1}; + SimpleTensor<T> b{shape_b_collapsed, data_type, 1}; + SimpleTensor<T> c{output_shape_collapsed, data_type, 1}; + + // Fill reference + fill(a, 0); + fill(b, 1); + + /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M), + therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K) + in order to be able to call reference implementation that works with (B x M x K) input. + Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */ + + // Define transposed shapes + TensorShape a_transposed_shape(a.shape()); + a_transposed_shape.set(0, a.shape().y()); + a_transposed_shape.set(1, a.shape().x()); + + TensorShape b_transposed_shape(b.shape()); + b_transposed_shape.set(0, b.shape().y()); + b_transposed_shape.set(1, b.shape().x()); + + // Define transposed tensors + SimpleTensor<T> a_transposed{a_transposed_shape, data_type}; + SimpleTensor<T> b_transposed{b_transposed_shape, data_type}; + + //pretranspose a if necessary + if (pretranspose_a) + { + a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U)); + } + + // pretranspose b if necessary + if (pretranspose_b) + { + b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U)); + } + + // Use transposed tensors if boolean enabled else use original tensors + SimpleTensor<T> result = + reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); + + // We reshape the gemm output back if the tensor is high dimensional + if (output_shape_collapsed != output_shape) + { + // std::cout << "called reshape: \n"; + result = reference::reshape_layer(result, output_shape); + } + + return result; + } + + CLTensor _target{}; + SimpleTensor<T> _reference{}; + bool _device_supports_export_to_cl_image{false}; + bool _device_supports_mmul{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuMatMulValidationFixture + : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape lhs_shape, + TensorShape rhs_shape, + TensorShape output_shape, + bool transpose_a, + bool transpose_b, + int M0, + int N0, + int K0, + bool export_rhs_to_cl_image, + DataType data_type) + { + ARM_COMPUTE_UNUSED(export_rhs_to_cl_image); + DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, N0, K0, + false /* export_rhs_to_cl_image bias */, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h new file mode 100644 index 0000000000..b0c7143d91 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + +#include "src/dynamic_fusion/utils/Utils.h" +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/PoolingLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type) + { + _target = compute_target(input_shape, pool_attr, data_type); + _reference = compute_reference( + input_shape, convert_pool_attr_to_pool_info(pool_attr, true /* mixed_precision */), data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + switch (tensor.data_type()) + { + case DataType::F16: + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f}; + library->fill(tensor, distribution, i); + break; + } + case DataType::F32: + { + std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type) + { + CLScheduler::get().default_reinit(); + + // Change shape due to NHWC data layout, test shapes are NCHW + permute(input_shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC)); + auto dst_info = context.create_tensor_info(); + + // Create Pool2dSettings + GpuPool2dSettings pool_settings = GpuPool2dSettings(); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType t_input{}; + TensorType t_dst{}; + + // Initialize user tensors + t_input.allocator()->init(*input_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_input.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_input), 0); + + // Run runtime + runtime.run({&t_input, &t_dst}); + return t_dst; + } + + SimpleTensor<T> compute_reference(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type) + { + // Create reference + SimpleTensor<T> src(shape, data_type, 1, QuantizationInfo()); + // Fill reference + fill(src, 0); + return reference::pooling_layer<T>(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, + PoolingType pool_type, + Size2D pool_size, + Padding2D pad, + Size2D stride, + bool exclude_padding, + DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, + Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding( + exclude_padding), + data_type); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuPool2dSpecialValidationFixture + : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type) + { + DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + input_shape, pool_attr, data_type); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h new file mode 100644 index 0000000000..c9ffbccbc7 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ActivationLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename... TArgs> +class DynamicFusionActivationValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, bool fuse, DataType data_type, ActivationLayerInfo act_info, TArgs... args) + { + _fuse = fuse; + _data_type = data_type; + _function = act_info.activation(); + _target = compute_target(shape, args...); + _reference = compute_reference(shape, act_info); + } + +protected: + std::vector<T> get_boundary_values(T min, T max) + { + // This function will return a vector filled with the following values that can + // represent two partitions derived from equivalent partitioning. + // * Lower partition: min, min + delta, lower quarter (nominal), center - delta + // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max + const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1); + const auto center_value = (min + max) / 2; + const auto lower_quarter = (min + center_value) / 2; + const auto upper_quarter = (center_value + max) / 2; + + std::vector<T> boundary_values{}; + + // To ensure all the inserted values are within the given range after subtracing/adding delta + auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values) + { + for (auto &v : new_values) + { + if (v >= min && v <= max) + { + boundary_values.emplace_back(v); + } + } + }; + + insert_values({min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), + static_cast<T>(center_value - delta)}); // lower partition + insert_values({static_cast<T>(center_value), static_cast<T>(center_value + delta), + static_cast<T>(upper_quarter), static_cast<T>(max - delta), max}); // upper partition + + return boundary_values; + } + + template <typename U> + void fill(U &&tensor) + { + float min_bound = 0; + float max_bound = 0; + std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(_function, _data_type); + library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound))); + } + + TensorType compute_target(const TensorShape &shape, TArgs... args) + { + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + + ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, args...); + if (_fuse) + { + ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, args...); + GpuOutput::create_op(sketch, ans_1_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_0_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src)); + + // Run runtime + runtime.run({&t_src, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info) + { + // Create reference + SimpleTensor<T> src{shape, _data_type, 1}; + + // Fill reference + fill(src); + + auto tmp = reference::activation_layer<T>(src, act_info); + + if (_fuse) + { + auto dst = reference::activation_layer<T>(tmp, act_info); + return dst; + } + else + { + return tmp; + } + } + +protected: + ActivationLayerInfo::ActivationFunction _function{}; + bool _fuse{false}; + DataType _data_type{}; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionSigmoidValidationFixture + : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, bool fuse, DataType data_type) + { + ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC}; + DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse, + data_type, act_info); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionTanhValidationFixture + : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, bool fuse, DataType data_type) + { + ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; + DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse, + data_type, act_info); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h new file mode 100644 index 0000000000..08fffb305b --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/DepthConvertLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> +class DynamicFusionCastValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) + { + _target = compute_target(shape, dt_in, dt_out, policy); + _reference = compute_reference(shape, dt_in, dt_out, policy); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, DataType dt_in, DataType dt_out) + { + // Restricting range to avoid inf values + if (dt_out == DataType::F16) + { + constexpr int signed_min = -32000; + constexpr int signed_max = 32000; + constexpr int unsigned_min = 0; + constexpr int unsigned_max = 65000; + + switch (dt_in) + { + case DataType::U8: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: + case DataType::S8: + case DataType::F32: + { + library->fill_tensor_uniform(tensor, i); + break; + } + case DataType::U16: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min), + static_cast<uint16_t>(unsigned_max)); + break; + } + case DataType::S16: + { + library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min), + static_cast<int16_t>(signed_max)); + break; + } + case DataType::U32: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min), + static_cast<uint32_t>(unsigned_max)); + break; + } + case DataType::S32: + { + library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), + static_cast<int32_t>(signed_max)); + break; + } + default: + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + // Given input is in nchw format + TensorType + compute_target(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + // Here, we use DataLayout::NCHW just for the test. However, the optimal data layout to + // be used with dynamic fusion is NHWC + ITensorInfo *src_info = + context.create_tensor_info(TensorInfo(shape, 1, dt_in, DataLayout::NCHW)); // layout is not important + ITensorInfo *dst_info = context.create_tensor_info(); + + CastAttributes attributes; + attributes.convert_policy(policy).data_type(dt_out); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src), 0, dt_in, dt_out); + + // Run runtime + runtime.run({&t_src, &t_dst}); + return t_dst; + } + + SimpleTensor<T2> + compute_reference(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy) + { + // Create reference + SimpleTensor<T1> src{shape, dt_in, 1}; + + // Fill reference + fill(src, 0, dt_in, dt_out); + + return reference::depth_convert<T1, T2>(src, dt_out, policy, 0); + } + + TensorType _target{}; + SimpleTensor<T2> _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h new file mode 100644 index 0000000000..e8f6f83e42 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/validation/reference/ActivationLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionClampValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, ClampAttributes attributes, bool fuse, DataType data_type) + { + // CLAMP is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped. + ActivationLayerInfo act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() }; + + _fuse = fuse; + _attributes = attributes; + _data_type = data_type; + _target = compute_target(shape, attributes); + _reference = compute_reference(shape, act_info); + } + +protected: + std::vector<T> get_boundary_values(T min, T max) + { + // This function will return a vector filled with the following values that can + // represent two partitions derived from equivalent partitioning. + // * Lower partition: min, min + delta, lower quarter (nominal), center - delta + // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max + const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1); + const auto center_value = (min + max) / 2; + const auto lower_quarter = (min + center_value) / 2; + const auto upper_quarter = (center_value + max) / 2; + + std::vector<T> boundary_values{}; + + // To ensure all the inserted values are within the given range after subtracing/adding delta + auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values) + { + for(auto &v : new_values) + { + if(v >= min && v <= max) + { + boundary_values.emplace_back(v); + } + } + }; + + insert_values({ min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), static_cast<T>(center_value - delta) }); // lower partition + insert_values({ static_cast<T>(center_value), static_cast<T>(center_value + delta), static_cast<T>(upper_quarter), static_cast<T>(max - delta), max }); // upper partition + + return boundary_values; + } + + template <typename U> + void fill(U &&tensor) + { + float min_bound = 0; + float max_bound = 0; + std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, _data_type); + library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound))); + } + + TensorType compute_target(const TensorShape &shape, ClampAttributes attributes) + { + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &context }; + + // Create sketch tensors + ITensorInfo* src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + ITensorInfo* dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type)); + + ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, attributes); + if(_fuse) + { + ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, attributes); + GpuOutput::create_op(sketch, ans_1_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_0_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src)); + + // Run runtime + runtime.run({ &t_src, &t_dst }); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info) + { + // Create reference + SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info }; + + // Fill reference + fill(src); + + auto dst = reference::activation_layer<T>(src, act_info, _quantization_info); + return dst; + } + +protected: + QuantizationInfo _quantization_info{}; + ClampAttributes _attributes{}; + bool _fuse{ false }; + DataType _data_type{}; + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h new file mode 100644 index 0000000000..f02aa5e36a --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/Globals.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* We use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing + * the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros + * to print the test data. + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulValidationFixture : public framework::Fixture +{ +public: + void setup(const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops = false) + { + _data_type = data_type; + _is_inplace = is_inplace; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Fuse first multiplication op + ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + ITensorInfo *dst_info = context.create_tensor_info(); + + ITensorInfo *rhs_info_fuse = nullptr; + + ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info); + + if (_fuse) + { + rhs_info_fuse = context.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_info, dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(*lhs_info); + t_rhs.allocator()->init(*rhs_info); + t_dst.allocator()->init(*dst_info); + if (_fuse) + { + t_rhs_fuse.allocator()->init(*rhs_info_fuse); + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + if (_fuse) + { + t_rhs_fuse.allocator()->allocate(); + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if (_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + + // Run runtime + if (_fuse) + { + runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst}); + } + else + { + runtime.run({&t_lhs, &t_rhs, &t_dst}); + } + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create reference + SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()}; + SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()}; + + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + SimpleTensor<T> ref_dst = reference::pixel_wise_multiplication<T, T, T>( + ref_lhs, ref_rhs, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type, + QuantizationInfo()); + if (_fuse) + { + fill(ref_rhs_fuse, 2); + SimpleTensor<T> ref_dst_fuse = reference::pixel_wise_multiplication<T, T, T>( + ref_dst, ref_rhs_fuse, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type, + QuantizationInfo()); + return ref_dst_fuse; + } + return ref_dst; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + bool _is_inplace{false}; + bool _fuse{false}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulOneOpValidationFixture + : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, DataType data_type, bool is_inplace) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape0, shape0, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulBroadcastValidationFixture + : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulTwoOpsValidationFixture + : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(const TensorShape &shape0, + const TensorShape &shape1, + const TensorShape &shape2, + DataType data_type, + bool is_inplace, + bool fuse_two_ops) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h new file mode 100644 index 0000000000..bde3360940 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/ReshapeAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h" + +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/Globals.h" +#include "tests/validation/reference/ReshapeLayer.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionGpuReshapeLayerValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type) + { + _target = compute_target(input_shape, output_shape, data_type); + _reference = compute_reference(input_shape, output_shape, data_type); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(TensorShape &input_shape, TensorShape &output_shape, DataType data_type) + { + // Check if indeed the input shape can be reshape to the output one + ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size()); + + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *src_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type)); + ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(output_shape, 1, data_type)); + ReshapeAttributes attributes; + attributes.shape(output_shape); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes); + GpuOutput::create_op(sketch, ans_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src), 0); + + // Run runtime + runtime.run({&t_src, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> + compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type) + { + // Create reference + SimpleTensor<T> src{input_shape, data_type}; + + // Fill reference + fill(src, 0); + + return reference::reshape_layer<T>(src, output_shape); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; +/** [ReshapeLayer fixture] **/ +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h new file mode 100644 index 0000000000..711767b66f --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h @@ -0,0 +1,272 @@ +/* +* Copyright (c) 2022-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/SimpleTensor.h" +#include "tests/validation/reference/Permute.h" +#include "tests/validation/reference/Scale.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionResizeGenericValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout, + InterpolationPolicy interpolation_policy, + SamplingPolicy sampling_policy, + bool align_corners, + QuantizationInfo output_quantization_info) + { + _shape = shape; + _interpolation_policy = interpolation_policy; + _sampling_policy = sampling_policy; + _data_type = data_type; + _input_quantization_info = quantization_info; + _output_quantization_info = output_quantization_info; + _align_corners = align_corners; + _data_layout = data_layout; + + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion resize supports only NHWC layout + + generate_scale(shape); + + std::mt19937 generator(library->seed()); + std::uniform_int_distribution<uint32_t> distribution_u8(0, 255); + + _target = compute_target(shape); + _reference = compute_reference(shape); + } + +protected: + void generate_scale(const TensorShape &shape) + { + static constexpr float _min_scale{0.25f}; + static constexpr float _max_scale{3.f}; + + constexpr float max_width{8192.0f}; + constexpr float max_height{6384.0f}; + constexpr float min_width{1.f}; + constexpr float min_height{1.f}; + + std::mt19937 generator(library->seed()); + std::uniform_real_distribution<float> distribution_float(_min_scale, _max_scale); + + auto generate = [&](size_t input_size, float min_output, float max_output) -> int + { + const float generated_scale = distribution_float(generator); + const int output_size = static_cast<int>( + utility::clamp(static_cast<float>(input_size) * generated_scale, min_output, max_output)); + return output_size; + }; + + // Input shape is always given in NCHW layout. NHWC is dealt by permute in compute_target() + const int idx_width = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT); + + _output_width = generate(shape[idx_width], min_width, max_width); + _output_height = generate(shape[idx_height], min_height, max_height); + } + + template <typename U> + void fill(U &&tensor) + { + if (tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution<float> distribution(-5.0f, 5.0f); + library->fill(tensor, distribution, 0); + } + else if (tensor.data_type() == DataType::F16) + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-5.0f, 5.0f}; + library->fill(tensor, distribution, 0); + } + else if (is_data_type_quantized(tensor.data_type())) + { + std::uniform_int_distribution<> distribution(0, 100); + library->fill(tensor, distribution, 0); + } + else + { + library->fill_tensor_uniform(tensor, 0); + } + } + + TensorType compute_target(TensorShape shape) + { + // Our test shapes are assumed in NCHW data layout, thus the permutation + permute(shape, PermutationVector(2U, 0U, 1U)); + + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + // Create sketch tensors + ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type, _data_layout)); + src_info->set_quantization_info(_input_quantization_info); + ITensorInfo *dst_info = context.create_tensor_info(); + + ResizeAttributes attributes; + attributes.align_corners(_align_corners) + .sampling_policy(_sampling_policy) + .interpolation_policy(_interpolation_policy) + .output_width(_output_width) + .output_height(_output_height); + + ITensorInfo *scale_result_info = FunctionType::create_op(sketch, src_info, attributes); + GpuOutput::create_op(sketch, scale_result_info, dst_info); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_src{}; + TensorType t_dst{}; + + // Initialize user tensors + t_src.allocator()->init(*src_info); + t_dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + t_src.allocator()->allocate(); + t_dst.allocator()->allocate(); + + fill(AccessorType(t_src)); + + // Run runtime + runtime.run({&t_src, &t_dst}); + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape) + { + // Create reference + SimpleTensor<T> src{shape, _data_type, 1, _input_quantization_info}; + + // Reference code is NCHW, so the input shapes are NCHW + const int idx_width = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT); + + const float scale_x = static_cast<float>(_output_width) / shape[idx_width]; + const float scale_y = static_cast<float>(_output_height) / shape[idx_height]; + + // Fill reference + fill(src); + + return reference::scale<T>(src, scale_x, scale_y, _interpolation_policy, BorderMode::REPLICATE, + static_cast<T>(0), _sampling_policy, /* ceil_policy_scale */ false, _align_corners, + _output_quantization_info); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + TensorShape _shape{}; + InterpolationPolicy _interpolation_policy{}; + SamplingPolicy _sampling_policy{}; + DataType _data_type{}; + DataLayout _data_layout{}; + QuantizationInfo _input_quantization_info{}; + QuantizationInfo _output_quantization_info{}; + bool _align_corners{false}; + int _output_width{0}; + int _output_height{0}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionResizeValidationFixture + : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, + DataType data_type, + DataLayout data_layout, + InterpolationPolicy policy, + SamplingPolicy sampling_policy, + bool align_corners) + { + DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape, data_type, QuantizationInfo(), data_layout, policy, sampling_policy, align_corners, + QuantizationInfo()); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> +class DynamicFusionResizeQuantizedValidationFixture + : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, + DataType data_type, + QuantizationInfo quantization_info, + DataLayout data_layout, + InterpolationPolicy policy, + SamplingPolicy sampling_policy, + bool align_corners) + { + DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape, data_type, quantization_info, data_layout, policy, sampling_policy, align_corners, + quantization_info); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H diff --git a/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h new file mode 100644 index 0000000000..175d4ff889 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h @@ -0,0 +1,158 @@ +/* +* Copyright (c) 2023-2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H + +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" + +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/SimpleTensor.h" +#include "tests/validation/reference/SoftmaxLayer.h" +#include "tests/validation/Validation.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionSoftmaxValidationGenericFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log) + { + _reference = compute_reference(shape, data_type, beta, axis, is_log); + _target = compute_target(shape, data_type, beta, axis, is_log); + } + +protected: + template <typename U> + void fill(U &&tensor) + { + if (tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution<float> distribution(-10.0f, 10.0f); + library->fill(tensor, distribution, 0); + } + else if (tensor.data_type() == DataType::F16) + { + arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-10.0f, 10.0f}; + library->fill(tensor, distribution, 0); + } + else if (!is_data_type_quantized(tensor.data_type())) + { + std::uniform_int_distribution<> distribution(0, 100); + library->fill(tensor, distribution, 0); + } + else + { + library->fill_tensor_uniform(tensor, 0); + } + } + + TensorType compute_target(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log) + { + // Create a new workload sketch + CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + GpuWorkloadContext context = GpuWorkloadContext{&cl_compile_ctx}; + GpuWorkloadSketch sketch{&context}; + + SoftmaxAttributes softmax_attr{}; + softmax_attr.axis(axis).beta(beta).is_log_softmax(is_log); + ITensorInfo *src_info = context.create_tensor_info(shape, 1, data_type); + ITensorInfo *dst_info = context.create_tensor_info(shape, 1, data_type); + FunctionType::create_op(sketch, src_info, dst_info, softmax_attr); + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + // Instead of using ACL allocated memory, the user can choose to import memory into the tensors + for (auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + // Construct user tensors + TensorType src{}; + TensorType dst{}; + + // Initialize user tensors + src.allocator()->init(*src_info); + dst.allocator()->init(*dst_info); + + // Allocate and fill user tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + fill(AccessorType(src)); + + // Run runtime + runtime.run({&src, &dst}); + + return dst; + } + + SimpleTensor<T> + compute_reference(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log) + { + // Create reference + SimpleTensor<T> src{shape, data_type, 1}; + + // Fill reference + fill(src); + + return reference::softmax_layer<T>(src, beta, axis, is_log); + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionSoftmaxValidationFixture + : public DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log) + { + DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup( + shape, data_type, beta, axis, is_log); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H |