diff options
Diffstat (limited to 'tests/validation/reference')
40 files changed, 1595 insertions, 320 deletions
diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp index 664b969125..2172362bdd 100644 --- a/tests/validation/reference/ActivationLayer.cpp +++ b/tests/validation/reference/ActivationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "ActivationLayer.h" #include "arm_compute/core/Types.h" + #include "tests/validation/Helpers.h" namespace arm_compute @@ -40,7 +41,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo ARM_COMPUTE_UNUSED(oq_info); // Create reference - SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 }; + SimpleTensor<T> dst{src.shape(), src.data_type(), 1}; // Compute reference const T a(info.a()); @@ -48,7 +49,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo #if defined(_OPENMP) #pragma omp parallel for #endif /* _OPENMP */ - for(int i = 0; i < src.num_elements(); ++i) + for (int i = 0; i < src.num_elements(); ++i) { dst[i] = activate_float<T>(src[i], a, b, info.activation()); } @@ -57,7 +58,8 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo } template <> -SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +SimpleTensor<uint8_t> +activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) { const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; @@ -68,7 +70,8 @@ SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src } template <> -SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +SimpleTensor<int8_t> +activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) { const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; @@ -79,7 +82,8 @@ SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, A } template <> -SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) +SimpleTensor<int16_t> +activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info) { const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info; @@ -88,9 +92,14 @@ SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src SimpleTensor<int16_t> dst = convert_to_symmetric<int16_t>(dst_tmp, dst_qinfo); return dst; } -template SimpleTensor<int32_t> activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); -template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); -template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<int32_t> +activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<float> +activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<half> +activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); +template SimpleTensor<bfloat16> +activation_layer(const SimpleTensor<bfloat16> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ActivationLayer.h b/tests/validation/reference/ActivationLayer.h index 8aad1af63e..7f896bd696 100644 --- a/tests/validation/reference/ActivationLayer.h +++ b/tests/validation/reference/ActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020,2022,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_ACTIVATION_LAYER_H -#define ARM_COMPUTE_TEST_ACTIVATION_LAYER_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H +#define ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -40,7 +40,7 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a { T ret; - switch(activation) + switch (activation) { case ActivationLayerInfo::ActivationFunction::ABS: ret = std::abs(x); @@ -61,13 +61,13 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a ret = std::min<T>(a, std::max<T>(b, x)); break; case ActivationLayerInfo::ActivationFunction::LEAKY_RELU: - ret = (x > 0) ? x : a * x; + ret = x > static_cast<T>(0) ? x : static_cast<T>(a * x); break; case ActivationLayerInfo::ActivationFunction::SOFT_RELU: - ret = std::log(static_cast<T>(1) + std::exp(x)); + ret = std::log(static_cast<T>(1) + std::exp(static_cast<double>(x))); break; case ActivationLayerInfo::ActivationFunction::ELU: - ret = (x > 0) ? x : a * (std::exp(x) - static_cast<T>(1)); + ret = x > static_cast<T>(0) ? x : static_cast<T>(a * (std::exp(x) - static_cast<T>(1))); break; case ActivationLayerInfo::ActivationFunction::SQRT: ret = std::sqrt(x); @@ -82,7 +82,14 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a ret = x; break; case ActivationLayerInfo::ActivationFunction::HARD_SWISH: - ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) * 0.166666667f); + ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) * + 0.166666667f); + break; + case ActivationLayerInfo::ActivationFunction::SWISH: + ret = static_cast<T>(x) / (static_cast<T>(1) + std::exp(-a * x)); + break; + case ActivationLayerInfo::ActivationFunction::GELU: + ret = x * 0.5f * (1 + erf(x / std::sqrt(2.0f))); break; default: ARM_COMPUTE_ERROR("Unsupported activation function"); @@ -93,9 +100,11 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a } template <typename T> -SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info = QuantizationInfo()); +SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, + ActivationLayerInfo info, + const QuantizationInfo &oq_info = QuantizationInfo()); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_ACTIVATION_LAYER_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H diff --git a/tests/validation/reference/BatchToSpaceLayer.cpp b/tests/validation/reference/BatchToSpaceLayer.cpp index 404ee73cac..63d121f59b 100644 --- a/tests/validation/reference/BatchToSpaceLayer.cpp +++ b/tests/validation/reference/BatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,10 @@ */ #include "BatchToSpaceLayer.h" +#include "arm_compute/core/Validate.h" #include "tests/validation/Helpers.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" namespace arm_compute { namespace test @@ -35,32 +37,37 @@ namespace reference { // Batch to Space template <typename T> -SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape) +SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape) { - ARM_COMPUTE_ERROR_ON(block_shape[0] <= 0); - ARM_COMPUTE_ERROR_ON(block_shape[1] <= 0); - SimpleTensor<T> result(dst_shape, src.data_type()); + ARM_COMPUTE_ERROR_ON(block_shape[0] < 1); + ARM_COMPUTE_ERROR_ON(block_shape[1] < 1); + const auto expected_dst_shape = misc::shape_calculator::compute_batch_to_space_shape(DataLayout::NCHW, src.shape(), block_shape[0], block_shape[1], crop_info); + ARM_COMPUTE_ERROR_ON(arm_compute::detail::have_different_dimensions(expected_dst_shape, dst_shape, 0)); + ARM_COMPUTE_UNUSED(expected_dst_shape); - int in_pos = 0; - const auto width_in = static_cast<int>(src.shape()[0]); - const auto height_in = static_cast<int>(src.shape()[1]); - const auto z_in = static_cast<int>(src.shape()[2]); - const auto batch_in = static_cast<int>(src.shape()[3]); + SimpleTensor<T> result(dst_shape, src.data_type()); + int out_pos = 0; + const auto width_out = static_cast<int>(dst_shape[0]); + const auto height_out = static_cast<int>(dst_shape[1]); + const auto z_out = static_cast<int>(dst_shape[2]); + const auto batch_out = static_cast<int>(dst_shape[3]); - for(int batch = 0; batch < batch_in; ++batch) + for(int batch = 0; batch < batch_out; ++batch) { - for(int z = 0; z < z_in; ++z) + for(int z = 0; z < z_out; ++z) { - for(int y = 0; y < height_in; ++y) + for(int y = 0; y < height_out; ++y) { - for(int x = 0; x < width_in; ++x) + for(int x = 0; x < width_out; ++x) { - const int r = src.shape()[3] / (block_shape[0] * block_shape[1]); - const int out_x = (block_shape[0] * x + (batch / r) % block_shape[0]); - const int out_y = (block_shape[1] * y + (batch / r) / block_shape[0]); - const int out_pos = out_x + dst_shape[0] * out_y + z * dst_shape[0] * dst_shape[1] + (batch % r) * dst_shape[0] * dst_shape[1] * dst_shape[2]; - result[out_pos] = src[in_pos]; - ++in_pos; + const int x_c = x + crop_info.left; + const int y_c = y + crop_info.top; + const int in_batch = batch + ((x_c % block_shape[0]) + (y_c % block_shape[1]) * (block_shape[0])) * dst_shape[3]; + const int in_x = x_c / block_shape[0]; + const int in_y = y_c / block_shape[1]; + const int in_pos = in_x + src.shape()[0] * in_y + z * src.shape()[0] * src.shape()[1] + in_batch * src.shape()[0] * src.shape()[1] * src.shape()[2]; + result[out_pos] = src[in_pos]; + ++out_pos; } } } @@ -68,8 +75,8 @@ SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<in return result; } -template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape); -template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape); +template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape); +template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/BatchToSpaceLayer.h b/tests/validation/reference/BatchToSpaceLayer.h index 52556cb53f..a37bfc3373 100644 --- a/tests/validation/reference/BatchToSpaceLayer.h +++ b/tests/validation/reference/BatchToSpaceLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H #define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H +#include "arm_compute/core/Types.h" #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -36,7 +37,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape); +SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp new file mode 100644 index 0000000000..e4010a507a --- /dev/null +++ b/tests/validation/reference/Conv3D.cpp @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Conv3D.h" + +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "support/AclRequires.h" +#include "tests/validation/reference/UtilsQuantizedAsymm.h" + +// Source/Destination Tensor shape indices (N D H W C) +constexpr unsigned int batch_dim = 4u; +constexpr unsigned int depth_dim = 3u; +constexpr unsigned int height_dim = 2u; +constexpr unsigned int width_dim = 1u; +constexpr unsigned int channel_dim = 0u; + +// Weight tensor shape indices (D H W Cin Cout) +constexpr unsigned int weights_depth_dim = 4u; +constexpr unsigned int weights_height_dim = 3u; +constexpr unsigned int weights_width_dim = 2u; +constexpr unsigned int weights_CHin_dim = 1u; +constexpr unsigned int weights_CHout_dim = 0u; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +namespace +{ +inline bool is_valid_pixel(int i, int min, int max) +{ + return (i >= min && i < max); +} + +// Evaluate the weights against an element in a given tensor. +template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 > +T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, + int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) +{ + ARM_COMPUTE_UNUSED(oq_info); + + const unsigned int weights_width = weights.shape()[weights_width_dim]; + const unsigned int weights_height = weights.shape()[weights_height_dim]; + const unsigned int weights_depth = weights.shape()[weights_depth_dim]; + + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int src_width = src.shape()[width_dim]; + const unsigned int src_height = src.shape()[height_dim]; + const unsigned int src_depth = src.shape()[depth_dim]; + + T total(0); + for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) + { + const int idx_z = z_start + dilation.depth * weight_d; + for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y) + { + const int idx_y = y_start + dilation.height * weight_y; + for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x) + { + const int idx_x = x_start + dilation.width * weight_x; + + //Check if the point is within padding + const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width); + const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height); + const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth); + const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid); + if(is_invalid_pixel) + { + continue; + } + + for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in) + { + const T *in_ptr = src.data(); + const T *w_ptr = weights.data(); + + const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch }); + const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d }); + T input_value = in_ptr[in_offset]; + T weight_value = w_ptr[weight_offset]; + total += (input_value * weight_value); + } + } + } + } + + const TB *b_ptr = bias.data(); + TB bias_value = b_ptr[ch_out]; + + return total + bias_value; +} + +template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) > +T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch, + int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info) +{ + const unsigned int weights_width = weights.shape()[weights_width_dim]; + const unsigned int weights_height = weights.shape()[weights_height_dim]; + const unsigned int weights_depth = weights.shape()[weights_depth_dim]; + + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int src_width = src.shape()[width_dim]; + const unsigned int src_height = src.shape()[height_dim]; + const unsigned int src_depth = src.shape()[depth_dim]; + + const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + + const int input_offset = -iq_info.offset; + const float input_scale = iq_info.scale; + int weights_offset = -wq_info.offset; + float weights_scale = wq_info.scale; + const int output_offset = oq_info.offset; + const float output_scale = oq_info.scale; + + int output_multiplier = 0; + int output_shift = 0; + const float multiplier = input_scale * weights_scale / output_scale; + arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + + int32_t total(0); + for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d) + { + const int idx_z = z_start + dilation.depth * weight_d; + for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y) + { + const int idx_y = y_start + dilation.height * weight_y; + for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x) + { + const int idx_x = x_start + dilation.width * weight_x; + + //Check if the point is within padding + const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width); + const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height); + const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth); + const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid); + if(is_invalid_pixel) + { + continue; + } + + for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in) + { + const T *in_ptr = src.data(); + const T *w_ptr = weights.data(); + + const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch }); + const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d }); + T input_value = in_ptr[in_offset]; + T weight_value = w_ptr[weight_offset]; + total += ((input_value + input_offset) * (weight_value + weights_offset)); + } + } + } + } + + const TB *b_ptr = bias.data(); + TB bias_value = b_ptr[ch_out]; + + total += bias_value; + + return validation::quantize_down_scale_by_fixedpoint(total, output_multiplier, output_shift, output_offset, + std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()); +} +} // namespace + +template <typename T, typename TB> +SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info) +{ + // Compute reference + const unsigned int batch_size = src.shape()[batch_dim]; + const unsigned int dst_width = dst.shape()[width_dim]; + const unsigned int dst_height = dst.shape()[height_dim]; + const unsigned int dst_depth = dst.shape()[depth_dim]; + const unsigned int src_channels = src.shape()[channel_dim]; + const unsigned int weights_out_ch = weights.shape()[weights_CHout_dim]; + const unsigned int dst_channels = dst.shape()[channel_dim]; + const size_t pad_left = conv3d_info.padding.left; + const size_t pad_top = conv3d_info.padding.top; + const size_t pad_front = conv3d_info.padding.front; + const size_t stride_x = conv3d_info.stride.x(); + const size_t stride_y = conv3d_info.stride.y(); + const size_t stride_z = conv3d_info.stride.z(); + + const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_conv3d_shape(src.shape(), weights.shape(), conv3d_info); + + ARM_COMPUTE_UNUSED(src_channels, weights_out_ch, dst_channels, dst_shape, weights_CHin_dim); + // Number of batches of source and destination tensors must match. + ARM_COMPUTE_ERROR_ON(src.shape()[batch_dim] != dst.shape()[batch_dim]); + // Input channels in the source and weights must match. + ARM_COMPUTE_ERROR_ON(src_channels != weights.shape()[weights_CHin_dim]); + // Weight channels in the destination and weights must match. + ARM_COMPUTE_ERROR_ON(weights_out_ch != dst_channels); + // Bias must match the number of destination channels. + ARM_COMPUTE_ERROR_ON(bias.shape()[0] != dst_channels); + // Compare given dst tensor shape with expected shape. + ARM_COMPUTE_ERROR_ON(dst.shape() != dst_shape); + + for(unsigned int batch = 0; batch < batch_size; ++batch) + { + for(unsigned int z_out = 0; z_out < dst_depth; ++z_out) + { + const int z_start = (z_out * stride_z) - pad_front; + for(unsigned int y_out = 0; y_out < dst_height; ++y_out) + { + const int y_start = (y_out * stride_y) - pad_top; + for(unsigned int x_out = 0; x_out < dst_width; ++x_out) + { + const int x_start = (x_out * stride_x) - pad_left; + for(unsigned int ch_out = 0; ch_out < dst_channels; ++ch_out) + { + T *out_ptr = dst.data(); + + const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch }); + out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform()); + } + } + } + } + } + return dst; +} + +template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, + const Conv3dInfo &conv3d_info); +template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst, + const Conv3dInfo &conv3d_info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h new file mode 100644 index 0000000000..e3674f4bfb --- /dev/null +++ b/tests/validation/reference/Conv3D.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H +#define ARM_COMPUTE_TEST_CONV3D_LAYER_H + +#include "Utils.h" +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template <typename T, typename TB> +SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, + const Conv3dInfo &conv3d_info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */ diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h index 1666e3857b..b67e88e839 100644 --- a/tests/validation/reference/Convolution3d.h +++ b/tests/validation/reference/Convolution3d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,7 @@ #define ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "support/Requires.h" +#include "support/AclRequires.h" #include "tests/validation/Helpers.h" #include "tests/validation/reference/UtilsQuantizedAsymm.h" diff --git a/tests/validation/reference/DFT.cpp b/tests/validation/reference/DFT.cpp index fd126c7d73..2b03c270ac 100644 --- a/tests/validation/reference/DFT.cpp +++ b/tests/validation/reference/DFT.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -400,10 +400,10 @@ SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, auto padded_src = pad_layer(src, padding_in); // Flip weights - std::vector<uint32_t> axis_v = { 0, 1 }; - SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 }; + std::vector<uint32_t> axis_v = { 0, 1 }; + SimpleTensor<int32_t> axis{ TensorShape(2U), DataType::S32 }; std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data()); - auto flipped_w = reverse(w, axis); + auto flipped_w = reverse(w, axis, /* use_inverted_axis */ false); // Pad weights to have the same size as input const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } }; diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp index 94c719ade7..3f88897f8e 100644 --- a/tests/validation/reference/DepthConvertLayer.cpp +++ b/tests/validation/reference/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -165,7 +165,7 @@ template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, Data template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); // BFLOAT16 -template SimpleTensor<float> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); // F16 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); @@ -186,6 +186,25 @@ template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, Dat template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +// S64 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// U64 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp index 64a89aa6a0..67d69c2c38 100644 --- a/tests/validation/reference/DequantizationLayer.cpp +++ b/tests/validation/reference/DequantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,6 +59,12 @@ TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt) ARM_COMPUTE_UNUSED(dt); return static_cast<TOut>(dequantize_qsymm16(val, qinfo)); } +template <typename TOut> +TOut dequantize(int32_t val, const UniformQuantizationInfo qinfo, DataType dt) +{ + ARM_COMPUTE_UNUSED(dt); + return static_cast<TOut>(dequantize_s32(val, qinfo)); +} } // namespace template <typename TOut, typename TIn> SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src) @@ -115,6 +121,7 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src); template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src); template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src); +template SimpleTensor<float> dequantization_layer(const SimpleTensor<int32_t> &src); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp index f22c84e153..edbbab8600 100644 --- a/tests/validation/reference/ElementwiseOperations.cpp +++ b/tests/validation/reference/ElementwiseOperations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -74,15 +74,6 @@ T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy case ArithmeticOperation::DIV: { val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2)); - if(std::is_integral<T>::value) - { - // Implement flooring division - val = (src2 == 0) ? 0 : val; - if(static_cast<int32_t>(src1) % static_cast<int32_t>(src2) != 0 && ((src1 < 0) != (src2 < 0))) - { - --val; - } - } break; } case ArithmeticOperation::POWER: diff --git a/tests/validation/reference/ElementwiseUnary.cpp b/tests/validation/reference/ElementwiseUnary.cpp index 5333b53c15..558f9d24fc 100644 --- a/tests/validation/reference/ElementwiseUnary.cpp +++ b/tests/validation/reference/ElementwiseUnary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,8 @@ * SOFTWARE. */ #include "ElementwiseUnary.h" - +#include "tests/validation/Helpers.h" +#include "utils/TypePrinter.h" namespace arm_compute { namespace test @@ -32,10 +33,8 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op) +SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op) { - SimpleTensor<T> dst(src.shape(), src.data_type()); - for(int i = 0; i < src.num_elements(); ++i) { switch(op) @@ -65,13 +64,107 @@ SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary o ARM_COMPUTE_ERROR("Not implemented"); } } + return dst; +} +template <> +SimpleTensor<int8_t> elementwise_unary(const SimpleTensor<int8_t> &src, SimpleTensor<int8_t> &dst, ElementWiseUnary op) +{ + if(dst.data_type() == DataType::QASYMM8_SIGNED) + { + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp(src.shape(), DataType::F32); + for(int i = 0; i < src.num_elements(); ++i) + { + switch(op) + { + case ElementWiseUnary::RSQRT: + if(src_tmp[i] != 0) + { + dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]); + } + else + { + // rsqrt(0) give 'inf' so set to the maximum in int8: 127 + dst_tmp[i] = (127.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ; + } + break; + + case ElementWiseUnary::LOG: + if(src_tmp[i] != 0) + { + dst_tmp[i] = std::log(src_tmp[i]); + } + else + { + dst_tmp[i] = (-128.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ; + } + break; + + default: + elementwise_unary(src_tmp, dst_tmp, op); + break; + } + } + dst = convert_to_asymmetric<int8_t>(dst_tmp, dst.quantization_info()); + } + else + { + ARM_COMPUTE_ERROR("Not implemented"); + } + return dst; +} +template <> +SimpleTensor<uint8_t> elementwise_unary(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &dst, ElementWiseUnary op) +{ + if(dst.data_type() == DataType::QASYMM8) + { + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp(src.shape(), DataType::F32); + for(int i = 0; i < src.num_elements(); ++i) + { + switch(op) + { + case ElementWiseUnary::RSQRT: + if(src_tmp[i] != 0) + { + dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]); + } + else + { + // rsqrt(0) give 'inf' so set to the maximum in uint8: 255 + dst_tmp[i] = (255.0f - dst.quantization_info().uniform().offset)* dst.quantization_info().uniform().scale; + } + break; + case ElementWiseUnary::LOG: + if(src_tmp[i] != 0) + { + dst_tmp[i] = std::log(src_tmp[i]); + } + else + { + dst_tmp[i] = -dst.quantization_info().uniform().offset * dst.quantization_info().uniform().scale; + } + break; + + default: + elementwise_unary(src_tmp, dst_tmp, op); + break; + } + } + dst = convert_to_asymmetric<uint8_t>(dst_tmp, dst.quantization_info()); + } + else + { + ARM_COMPUTE_ERROR("Not implemented"); + } return dst; } -template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, ElementWiseUnary op); -template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, ElementWiseUnary op); -template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, ElementWiseUnary op); +template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, SimpleTensor<float> &dst, ElementWiseUnary op); +template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, SimpleTensor<half> &dst, ElementWiseUnary op); +template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, SimpleTensor<int32_t> &dst, ElementWiseUnary op); + } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ElementwiseUnary.h b/tests/validation/reference/ElementwiseUnary.h index be4a229a5b..ae7a49bce4 100644 --- a/tests/validation/reference/ElementwiseUnary.h +++ b/tests/validation/reference/ElementwiseUnary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,7 +35,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op); +SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp index 21333958f8..af30e9ee54 100644 --- a/tests/validation/reference/FullyConnectedLayer.cpp +++ b/tests/validation/reference/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -123,7 +123,7 @@ SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTe // Create reference SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, out_quant_info }; - // Sanity checks + // Health checks const int num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1); const int num_input_dimensions = src.shape().num_dimensions() - num_batch_dimensions; const unsigned int linear_input_size = src.shape().total_size_lower(num_input_dimensions); diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp index 6b3aa390f0..d513343796 100644 --- a/tests/validation/reference/GEMM.cpp +++ b/tests/validation/reference/GEMM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" +#include "tests/validation/reference/ArithmeticOperations.h" namespace arm_compute { @@ -35,10 +36,11 @@ namespace validation namespace reference { template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> -SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) +SimpleTensor<T> +gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) { // Create reference - SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 }; + SimpleTensor<T> dst{c.shape(), c.data_type(), 1}; // Compute reference const int M = a.shape().y(); @@ -50,30 +52,47 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S const int a_stride_z = K * M; const int a_stride_w = K * M * D; - const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions - const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + const int b_stride_z = + b.shape().num_dimensions() > 2 + ? N * K + : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions + int b_stride_w = + b.shape().num_dimensions() > 3 + ? K * N * D + : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + + // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions: + // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4 + const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 && + c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1; + + // Batched-GEMM + if (is_batched_gemm) + { + b_stride_w = b_stride_z; + } const int c_stride_z = N * M; const int c_stride_w = N * M * D; -#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__)) +#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__)) #pragma omp parallel for collapse(2) #endif /* _OPENMP */ - for(int w = 0; w < W; ++w) + for (int w = 0; w < W; ++w) { - for(int depth = 0; depth < D; ++depth) + for (int depth = 0; depth < D; ++depth) { const int base_addr_a = depth * a_stride_z + w * a_stride_w; const int base_addr_b = depth * b_stride_z + w * b_stride_w; const int base_addr_c = depth * c_stride_z + w * c_stride_w; - for(int row = 0; row < M; ++row) + for (int row = 0; row < M; ++row) { - for(int col = 0; col < N; ++col) + for (int col = 0; col < N; ++col) { T acc(0); - for(int k = 0; k < K; ++k) + for (int k = 0; k < K; ++k) { acc += a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N]; } @@ -89,11 +108,12 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S } template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> -SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) +SimpleTensor<T> gemm_mixed_precision( + const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta) { // GEMM mixed-precision combines F32 accumulators with F16 multiplications // Create reference - SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 }; + SimpleTensor<T> dst{c.shape(), c.data_type(), 1}; // Compute reference const int M = a.shape().y(); @@ -105,36 +125,54 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso const int a_stride_z = K * M; const int a_stride_w = K * M * D; - const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions - const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + const int b_stride_z = + b.shape().num_dimensions() > 2 + ? N * K + : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions + int b_stride_w = + b.shape().num_dimensions() > 3 + ? K * N * D + : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions + + // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions: + // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4 + const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 && + c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1; + + // Batched-GEMM + if (is_batched_gemm) + { + b_stride_w = b_stride_z; + } const int c_stride_z = N * M; const int c_stride_w = N * M * D; -#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__)) +#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__)) #pragma omp parallel for collapse(2) #endif /* _OPENMP */ - for(int w = 0; w < W; ++w) + for (int w = 0; w < W; ++w) { - for(int depth = 0; depth < D; ++depth) + for (int depth = 0; depth < D; ++depth) { const int base_addr_a = depth * a_stride_z + w * a_stride_w; const int base_addr_b = depth * b_stride_z + w * b_stride_w; const int base_addr_c = depth * c_stride_z + w * c_stride_w; - for(int row = 0; row < M; ++row) + for (int row = 0; row < M; ++row) { - for(int col = 0; col < N; ++col) + for (int col = 0; col < N; ++col) { float acc(0); - for(int k = 0; k < K; ++k) + for (int k = 0; k < K; ++k) { acc += static_cast<float>(a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N]); } // Finalize the result: alpha * A * B + beta * C - dst[base_addr_c + col + row * N] = static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]); + dst[base_addr_c + col + row * N] = + static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]); } } } @@ -143,8 +181,21 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso return dst; } +template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type> +void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst) +{ + // Compute reference + SimpleTensor<T> dst_gemm = gemm(a, b, c, alpha, beta); + reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE); +} + +template SimpleTensor<bfloat16> gemm(const SimpleTensor<bfloat16> &a, const SimpleTensor<bfloat16> &b, const SimpleTensor<bfloat16> &c, float alpha, float beta); template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta); template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); + +template void gemm_accumulate(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta, SimpleTensor<float> &dst); +template void gemm_accumulate(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta, SimpleTensor<half> &dst); + template SimpleTensor<half> gemm_mixed_precision(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta); } // namespace reference } // namespace validation diff --git a/tests/validation/reference/GEMM.h b/tests/validation/reference/GEMM.h index 5feaeda584..1b97570122 100644 --- a/tests/validation/reference/GEMM.h +++ b/tests/validation/reference/GEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMM_H -#define ARM_COMPUTE_TEST_GEMM_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMM_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMM_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -41,8 +41,11 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta); +template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0> +void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst); + } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMM_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMM_H diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp index 1615b51e73..30c577d850 100644 --- a/tests/validation/reference/GEMMLowp.cpp +++ b/tests/validation/reference/GEMMLowp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "GEMMLowp.h" #include "arm_compute/core/Types.h" +#include "tests/validation/reference/ArithmeticOperations.h" #include "tests/validation/reference/UtilsQuantizedAsymm.h" #include "support/ToolchainSupport.h" @@ -230,6 +231,13 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c return c; } +template <typename T_out, typename T_in, typename T_in_1> +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T_out> &dst) +{ + SimpleTensor<T_out> dst_gemm = gemmlowp_matrix_multiply_core<T_out, T_in, T_in_1>(a, b, shape_c, a_offset, b_offset); + reference::arithmetic_operation<T_out>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE); +} + // used to validate assembly kernels which don't know anything about offsets template <typename T1, typename T2, typename T3> SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c) @@ -336,6 +344,8 @@ template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<in std::vector<int32_t> result_shift, int32_t min, int32_t max); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst); +template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst); template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c); template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c); diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h index 99015d71fb..6e471fdad1 100644 --- a/tests/validation/reference/GEMMLowp.h +++ b/tests/validation/reference/GEMMLowp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_GEMMLOWP_H -#define ARM_COMPUTE_TEST_GEMMLOWP_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H +#define ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -38,6 +38,9 @@ namespace reference template <typename T1, typename T2, typename T3> SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset); +template <typename T1, typename T2, typename T3> +void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T1> &dst_); + template <typename T1, typename T2, typename T3 = T2> SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c); @@ -71,4 +74,4 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn> } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_GEMMLOWP_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp index 93ac09cf95..c90c04f8cc 100644 --- a/tests/validation/reference/Gather.cpp +++ b/tests/validation/reference/Gather.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,27 +39,56 @@ namespace reference template <typename T> SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis) { - const auto *indices_ptr = static_cast<const uint32_t *>(indices.data()); const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_gather_shape(src.shape(), indices.shape(), actual_axis); SimpleTensor<T> dst(dst_shape, src.data_type()); + const auto src_ptr = static_cast<const T *>(src.data()); + const auto indices_ptr = static_cast<const uint32_t *>(indices.data()); + const auto dst_ptr = static_cast<T *>(dst.data()); + + const uint32_t index_limit = src.shape()[actual_axis]; + Window win; win.use_tensor_dimensions(dst_shape); - execute_window_loop(win, [&](const Coordinates & id) - { - Coordinates offset; - for(unsigned int dim = 0; dim < id.num_dimensions(); ++dim) + + execute_window_loop(win, [&](const Coordinates &dst_coords) { + const auto dst_addr = coords2index(dst.shape(), dst_coords); + + // Calculate the coordinates of the index value. + Coordinates idx_coords; + + for(size_t i = 0; i < indices.shape().num_dimensions(); ++i) { - if(dim == actual_axis) + idx_coords.set(i, dst_coords[i + actual_axis]); + } + + const auto index = indices_ptr[coords2index(indices.shape(), idx_coords)]; + + if(index < index_limit) + { + // Calculate the coordinates of the source data. + Coordinates src_coords; + + for(size_t i = 0; i < actual_axis; ++i) { - offset.set(dim, indices_ptr[id[dim]]); + src_coords.set(i, dst_coords[i]); } - else + + src_coords.set(actual_axis, index); + + for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i) { - offset.set(dim, id[dim]); + src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]); } + + // Copy the data. + const auto src_addr = coords2index(src.shape(), src_coords); + dst_ptr[dst_addr] = src_ptr[src_addr]; + } + else + { + dst_ptr[dst_addr] = 0; } - *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset)); }); return dst; @@ -72,4 +101,4 @@ template SimpleTensor<uint8_t> gather(const SimpleTensor<uint8_t> &src, const Si } // namespace reference } // namespace validation } // namespace test -} // namespace arm_compute
\ No newline at end of file +} // namespace arm_compute diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp new file mode 100644 index 0000000000..7500560c91 --- /dev/null +++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "IndirectConv2dAddressPrecalculation.h" + +#include "arm_compute/core/Types.h" + +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_dst, const TensorShape &shape_dst, + const PadStrideInfo &conv_info) +{ + SimpleTensor<int32_t> out{ shape_dst, DataType::S32 }; + + constexpr unsigned int width_idx = 1; + constexpr unsigned int heigh_idx = 2; + + const int src_conv_width = static_cast<int32_t>(shape_conv_src[width_idx]); // NHWC + const int src_conv_height = static_cast<int32_t>(shape_conv_src[heigh_idx]); // NHWC + const int dst_conv_width = static_cast<int32_t>(shape_conv_dst[width_idx]); // NHWC + const int wei_conv_width = static_cast<int32_t>(shape_conv_wei[width_idx]); // NHWC + const int wei_conv_height = static_cast<int32_t>(shape_conv_wei[heigh_idx]); // NHWC + const int dst_width = static_cast<int32_t>(shape_dst[0]); + const int dst_height = static_cast<int32_t>(shape_dst[1]); + const int dst_batch = static_cast<int32_t>(shape_dst[2]); + const int ks = wei_conv_width * wei_conv_height; + const int stride_x = static_cast<int32_t>(conv_info.stride().first); + const int stride_y = static_cast<int32_t>(conv_info.stride().second); + const int pad_left = static_cast<int32_t>(conv_info.pad_left()); + const int pad_top = static_cast<int32_t>(conv_info.pad_top()); + + const int m0 = dst_width / ks; + + for(int z = 0; z < dst_batch; ++z) + { + for(int y = 0; y < dst_height; ++y) + { + const int mout = y * m0; + for(int ki = 0; ki < ks; ++ki) + { + const int xk = ki % wei_conv_width; + const int yk = ki / wei_conv_width; + for(int mi = 0; mi < m0; ++mi) + { + int xi = ((mout + mi) % dst_conv_width) * stride_x; + int yi = ((mout + mi) / dst_conv_width) * stride_y; + xi -= pad_left; + yi -= pad_top; + const int x_s = xi + xk; + const int y_s = yi + yk; + int my = x_s + y_s * src_conv_width; + my = my + z * src_conv_width * src_conv_height; + my = x_s >= 0 ? my : -1; + my = x_s < src_conv_width ? my : -1; + my = y_s >= 0 ? my : -1; + my = y_s < src_conv_height ? my : -1; + + const unsigned int addr_out = mi + ki * m0 + y * (dst_width) + z * (dst_width * dst_height); + out[addr_out] = my; + } + } + } + } + + return out; +} +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute
\ No newline at end of file diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.h b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h new file mode 100644 index 0000000000..f4a90dfd9f --- /dev/null +++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H +#define ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H + +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_out, const TensorShape &shape_out, + const PadStrideInfo &conv_info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H */
\ No newline at end of file diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp index 0a23fa19bb..a7c8a784d9 100644 --- a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp +++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -63,6 +63,15 @@ SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float e return dst; } +template <> +SimpleTensor<uint8_t> mean_std_normalization_layer(const SimpleTensor<uint8_t> &src, float epsilon) +{ + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = mean_std_normalization_layer<float>(src_tmp, epsilon); + SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info()); + return dst; +} + template SimpleTensor<float> mean_std_normalization_layer(const SimpleTensor<float> &src, float epsilon); template SimpleTensor<half> mean_std_normalization_layer(const SimpleTensor<half> &src, float epsilon); } // namespace reference diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp index 6f122b1bf5..7aa3011d8f 100644 --- a/tests/validation/reference/Permute.cpp +++ b/tests/validation/reference/Permute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2019,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "Permute.h" #include "arm_compute/core/Types.h" + #include "tests/validation/Helpers.h" namespace arm_compute @@ -42,11 +43,11 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm) permute(dst_shape, perm); // Create reference - SimpleTensor<T> dst{ dst_shape, src.data_type(), src.num_channels(), src.quantization_info() }; + SimpleTensor<T> dst{dst_shape, src.data_type(), src.num_channels(), src.quantization_info()}; // Compute reference const uint32_t num_elements = src.num_elements(); - for(uint32_t i = 0; i < num_elements; ++i) + for (uint32_t i = 0; i < num_elements; ++i) { const Coordinates src_coords = index2coord(src.shape(), i); Coordinates dst_coords = src_coords; @@ -58,13 +59,14 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm) return dst; } -template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm); -template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm); -template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm); +template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm); +template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm); +template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm); template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm); template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm); -template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm); -template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm); +template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm); +template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm); +template SimpleTensor<bfloat16> permute(const SimpleTensor<bfloat16> &src, PermutationVector perm); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Pooling3dLayer.cpp b/tests/validation/reference/Pooling3dLayer.cpp new file mode 100644 index 0000000000..2e8f3a0b92 --- /dev/null +++ b/tests/validation/reference/Pooling3dLayer.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Pooling3dLayer.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +using namespace arm_compute::misc::shape_calculator; + +template <typename T> +SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices) +{ + TensorShape pooled_shape = compute_pool3d_shape(src.shape(), pool3d_info); + SimpleTensor<T> dst{ pooled_shape, src.data_type(), 1 }; + + if(indices != nullptr) + { + *indices = SimpleTensor<uint32_t> { pooled_shape, DataType::U32, 1 }; + } + + const int idx_channel = 0; + const int idx_width = 1; + const int idx_height = 2; + const int idx_depth = 3; + const int idx_batch = 4; + + const int pool_size_width = pool3d_info.is_global_pooling ? src.shape()[idx_width] : pool3d_info.pool_size.width; + const int pool_size_height = pool3d_info.is_global_pooling ? src.shape()[idx_height] : pool3d_info.pool_size.height; + const int pool_size_depth = pool3d_info.is_global_pooling ? src.shape()[idx_depth] : pool3d_info.pool_size.depth; + + const int pool_stride_width = static_cast<int>(pool3d_info.stride.width); + const int pool_stride_height = static_cast<int>(pool3d_info.stride.height); + const int pool_stride_depth = static_cast<int>(pool3d_info.stride.depth); + + const int pad_left = static_cast<int>(pool3d_info.padding.left); + const int pad_top = static_cast<int>(pool3d_info.padding.top); + const int pad_front = static_cast<int>(pool3d_info.padding.front); + + const int pad_right = static_cast<int>(pool3d_info.padding.right); + const int pad_bottom = static_cast<int>(pool3d_info.padding.bottom); + const int pad_back = static_cast<int>(pool3d_info.padding.back); + + const int num_channels = static_cast<int>(src.shape()[idx_channel]); + const int num_batches = static_cast<int>(src.shape()[idx_batch]); + + ARM_COMPUTE_ERROR_ON(num_channels != static_cast<int>(dst.shape()[idx_channel])); + ARM_COMPUTE_ERROR_ON(num_batches != static_cast<int>(dst.shape()[idx_batch])); + + const int w_src = static_cast<int>(src.shape()[idx_width]); + const int h_src = static_cast<int>(src.shape()[idx_height]); + const int d_src = static_cast<int>(src.shape()[idx_depth]); + const int w_dst = static_cast<int>(dst.shape()[idx_width]); + const int h_dst = static_cast<int>(dst.shape()[idx_height]); + const int d_dst = static_cast<int>(dst.shape()[idx_depth]); + + const bool exclude_padding = pool3d_info.exclude_padding; + + const int height_stride_src = num_channels * w_src; + const int depth_stride_src = height_stride_src * h_src; + const int batch_stride_src = depth_stride_src * d_src; + const int height_stride_dst = num_channels * w_dst; + const int depth_stride_dst = height_stride_dst * h_dst; + const int batch_stride_dst = depth_stride_dst * d_dst; + + for(int b = 0; b < num_batches; ++b) + { + const int batch_offset_dst = b * batch_stride_dst; + const int batch_offset_src = b * batch_stride_src; + for(int c = 0; c < num_channels; ++c) + { + for(int d = 0; d < d_dst; ++d) + { + const int depth_offset_dst = d * depth_stride_dst; + for(int h = 0; h < h_dst; ++h) + { + const int height_offset_dst = h * height_stride_dst; + for(int w = 0; w < w_dst; ++w) + { + int wstart = w * pool_stride_width - pad_left; + int hstart = h * pool_stride_height - pad_top; + int dstart = d * pool_stride_depth - pad_front; + int wend = std::min(wstart + pool_size_width, w_src + pad_right); + int hend = std::min(hstart + pool_size_height, h_src + pad_bottom); + int dend = std::min(dstart + pool_size_depth, d_src + pad_back); + + // this may not be equal to pool_w * pool_h * pool_d because of + // DimensionRoundingType choice (CEIL) + int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); + + // limit [start, end) to [0, w_src) + wstart = std::max(wstart, 0); + hstart = std::max(hstart, 0); + dstart = std::max(dstart, 0); + wend = std::min(wend, w_src); + hend = std::min(hend, h_src); + dend = std::min(dend, d_src); + + auto max_val = -std::numeric_limits<T>::infinity(); + int max_index{ 0 }; + T avg_val = static_cast<T>(0.f); + T l2_val = static_cast<T>(0.f); + + if(exclude_padding) + { + pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); + } + + for(int z = dstart; z < dend; ++z) + { + const int depth_offset_src = z * depth_stride_src; + for(int y = hstart; y < hend; ++y) + { + const int height_offset_src = y * height_stride_src; + for(int x = wstart; x < wend; ++x) + { + const auto val = static_cast<T>( + src[batch_offset_src + depth_offset_src + height_offset_src + x * num_channels + c]); + if(val > max_val) + { + max_val = val; + max_index = coord2index(src.shape(), Coordinates(c, x, y, z, 0)); + } + + avg_val += val; + l2_val += val * val; + } + } + } + + avg_val /= pool_size; + l2_val = static_cast<T>(std::sqrt(l2_val / pool_size)); + + int dst_index = batch_offset_dst + depth_offset_dst + height_offset_dst + w * num_channels + c; + switch(pool3d_info.pool_type) + { + case PoolingType::MAX: + dst[dst_index] = static_cast<T>(max_val); + break; + case PoolingType::AVG: + dst[dst_index] = static_cast<T>(avg_val); + break; + case PoolingType::L2: + dst[dst_index] = static_cast<T>(l2_val); + break; + default: + ARM_COMPUTE_ERROR("Pooling Type should be either MAX, AVG or L2"); + } + + if(indices != nullptr) + { + (*indices)[dst_index] = max_index; + } + } + } + } + } + } + + return dst; +} + +template SimpleTensor<float> pooling_3d_layer(const SimpleTensor<float> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices); +template SimpleTensor<half> pooling_3d_layer(const SimpleTensor<half> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices); + +template <typename T> +SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices) +{ + ARM_COMPUTE_UNUSED(output_qinfo); + return pooling_3d_layer_internal<T>(src, pool3d_info, indices); +} + +template <> +SimpleTensor<int8_t> pooling_3d_layer<int8_t>(const SimpleTensor<int8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices) +{ + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices); + return convert_to_asymmetric<int8_t>(dst_tmp, output_qinfo); +} + +template <> +SimpleTensor<uint8_t> pooling_3d_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices) +{ + SimpleTensor<float> src_tmp = convert_from_asymmetric(src); + SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices); + return convert_to_asymmetric<uint8_t>(dst_tmp, output_qinfo); +} + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/Pooling3dLayer.h b/tests/validation/reference/Pooling3dLayer.h new file mode 100644 index 0000000000..481a0d3024 --- /dev/null +++ b/tests/validation/reference/Pooling3dLayer.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_POOL3D_LAYER_H +#define ARM_COMPUTE_TEST_POOL3D_LAYER_H + +#include "Utils.h" +#include "arm_compute/core/Types.h" +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template <typename T> +SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices = nullptr); + +template <typename T> +SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo = QuantizationInfo(), + SimpleTensor<uint32_t> *indices = nullptr); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_POOL3D_LAYER_H */ diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp index 5f4edfe49c..bf7bd0c1df 100644 --- a/tests/validation/reference/PoolingLayer.cpp +++ b/tests/validation/reference/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,6 @@ using namespace arm_compute::misc::shape_calculator; template <typename T, typename ACC_T, typename std::enable_if<is_floating_point<T>::value, int>::type> SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices, DataLayout data_layout) { - ARM_COMPUTE_ERROR_ON(info.is_global_pooling && (src.shape().x() != src.shape().y())); // Create reference SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 }; auto pooled_shape = compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info); @@ -84,20 +83,28 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling { int wstart = w * pool_stride_x - pad_left; int hstart = h * pool_stride_y - pad_top; + + // Used to calculate kernel indices + int kh_start = std::max(0, -hstart); + int kw_start = std::max(0, -wstart); + int max_ker_index{ 0 }; + int wend = std::min(wstart + pool_size_x, w_src); int hend = std::min(hstart + pool_size_y, h_src); wstart = std::max(wstart, 0); hstart = std::max(hstart, 0); - auto max_val = std::numeric_limits<ACC_T>::lowest(); + auto max_val = info.use_inf_as_limit ? -std::numeric_limits<ACC_T>::infinity() : std::numeric_limits<ACC_T>::lowest(); int max_index{ 0 }; - for(int y = hstart; y < hend; ++y) + + for(int y = hstart, kh = kh_start; y < hend; ++y, ++kh) { - for(int x = wstart; x < wend; ++x) + for(int x = wstart, kw = kw_start; x < wend; ++x, ++kw) { const auto val = static_cast<ACC_T>(src[b * z_src * h_src * w_src + r * h_src * w_src + y * w_src + x]); if(val > max_val) { - max_val = val; + max_val = val; + max_ker_index = pool_size_x * (kh) + (kw); if(data_layout == DataLayout::NCHW) { max_index = coord2index(src.shape(), Coordinates(x, y, r, 0)); @@ -113,7 +120,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling dst[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = static_cast<T>(max_val); if(indices) { - (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = max_index; + (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = (info.use_kernel_indices) ? max_ker_index : max_index; } } } diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index 27665375c3..ad7ba7ac43 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp index ffb79f86c5..c189bc2d47 100644 --- a/tests/validation/reference/ReductionOperation.cpp +++ b/tests/validation/reference/ReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,6 @@ * SOFTWARE. */ #include "ReductionOperation.h" - #include "tests/validation/Helpers.h" #include <algorithm> @@ -39,7 +38,7 @@ namespace reference namespace { template <typename T, typename OT> -OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride) +OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy) { using type = typename std::remove_cv<OT>::type; T res; @@ -99,7 +98,14 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in } if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0) { - int_res /= reduce_elements; + // Only use rounding in aarch64 to be consistent with kernel +#ifdef __aarch64__ + // Divide in float format, then rounded to nearest and implicitly cast back to int + int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy); +#else // defined(__aarch64__) + ARM_COMPUTE_UNUSED(policy); + int_res /= reduce_elements; // Legacy compatibility +#endif // __aarch64 } res = static_cast<type>(int_res); } @@ -175,12 +181,12 @@ OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOper } // namespace template <typename T, typename OT> -SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op) +SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type, RoundingPolicy policy) { // Create reference - const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX); - DataType output_data_type = is_arg_min_max ? DataType::S32 : src.data_type(); - SimpleTensor<OT> dst{ dst_shape, output_data_type, 1, src.quantization_info() }; + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX); + SimpleTensor<OT> dst{ dst_shape, output_type, 1, src.quantization_info() }; const unsigned int src_width = src.shape().x(); const unsigned int src_height = src.shape().y(); const unsigned int src_depth = src.shape().z(); @@ -197,7 +203,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + du * reduce_elems; dst[du] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy); } } break; @@ -213,7 +219,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + in_offset; dst[out_offset] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy); } } } @@ -232,7 +238,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + in_offset; dst[out_offset] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy); } } } @@ -254,7 +260,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T const T *src_row_ptr = src.data() + in_offset; dst[out_offset] = is_arg_min_max ? reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) : - reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth); + reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy); } } } @@ -269,74 +275,89 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T } template <typename T, typename OT> -SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output) +SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy) { ARM_COMPUTE_UNUSED(quantization_info_output); - return compute_reduction_operation<T, OT>(src, dst_shape, axis, op); + return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, output_type, policy); } template <> -SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output) +SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy) { if(src.data_type() == DataType::QASYMM8) { // If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output) { - return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy); } else { SimpleTensor<float> src_f = convert_from_asymmetric(src); - SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op); + SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type); return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output); } } else { - return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy); } } template <> -SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output) +SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, + ReductionOperation op, DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy) { if(src.data_type() == DataType::QASYMM8_SIGNED) { // If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output) { - return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy); } else { SimpleTensor<float> src_f = convert_from_asymmetric(src); - SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op); + SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type); return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output); } } else { - return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op); + return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy); } } template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(), + RoundingPolicy policy = RoundingPolicy::TO_ZERO); + template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); + template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); + DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); +template SimpleTensor<int64_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, + DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(), + RoundingPolicy policy = RoundingPolicy::TO_ZERO); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h index 9c9e721b29..fb2e7a7093 100644 --- a/tests/validation/reference/ReductionOperation.h +++ b/tests/validation/reference/ReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_REDUCTION_OPERATION_H #define ARM_COMPUTE_TEST_REDUCTION_OPERATION_H +#include "arm_compute/core/Rounding.h" #include "tests/SimpleTensor.h" #include "tests/validation/Helpers.h" @@ -36,8 +37,8 @@ namespace validation namespace reference { template <typename T, typename OT> -SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, - QuantizationInfo quantization_info_output = QuantizationInfo()); +SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, DataType output_type = DataType::S32, + QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Remap.cpp b/tests/validation/reference/Remap.cpp deleted file mode 100644 index 33c5a7de68..0000000000 --- a/tests/validation/reference/Remap.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2017-2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "Remap.h" - -#include "Utils.h" -#include "tests/validation/Helpers.h" - -#include <algorithm> -#include <array> - -namespace arm_compute -{ -namespace test -{ -namespace validation -{ -namespace reference -{ -template <typename T> -SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode, - T constant_border_value) -{ - ARM_COMPUTE_ERROR_ON_MSG(border_mode == BorderMode::REPLICATE, "BorderMode not supported"); - SimpleTensor<T> out(in.shape(), in.data_type()); - ARM_COMPUTE_ERROR_ON(out.num_elements() != map_x.num_elements()); - const int width = in.shape().x(); - const int height = in.shape().y(); - const uint32_t num_elements = out.num_elements(); - for(uint32_t idx = 0; idx < num_elements; idx++) - { - const Coordinates id_out = index2coord(out.shape(), idx); - valid_mask[idx] = 1; - Coordinates src_idx = id_out; // need to setup all coordinates and not just xy - if((0 <= map_y[idx]) && (map_y[idx] < height) && (0 <= map_x[idx]) && (map_x[idx] < width)) - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - { - src_idx.set(0, static_cast<int>(std::floor(map_x[idx]))); - src_idx.set(1, static_cast<int>(std::floor(map_y[idx]))); - out[idx] = in[coord2index(in.shape(), src_idx)]; - break; - } - case InterpolationPolicy::BILINEAR: - { - (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ? - out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) : - valid_mask[idx] = 0; - break; - } - case InterpolationPolicy::AREA: - default: - ARM_COMPUTE_ERROR("Interpolation not supported"); - break; - } - } - else - { - if(border_mode == BorderMode::UNDEFINED) - { - valid_mask[idx] = 0; - } - else - { - switch(policy) - { - case InterpolationPolicy::NEAREST_NEIGHBOR: - out[idx] = constant_border_value; - break; - case InterpolationPolicy::BILINEAR: - out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value); - break; - case InterpolationPolicy::AREA: - default: - break; - } - } - } - } - - return out; -} - -template SimpleTensor<uint8_t> remap(const SimpleTensor<uint8_t> &src, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<uint8_t> &valid_mask, InterpolationPolicy policy, - BorderMode border_mode, - uint8_t constant_border_value); -} // namespace reference -} // namespace validation -} // namespace test -} // namespace arm_compute diff --git a/tests/validation/reference/Reorder.cpp b/tests/validation/reference/Reorder.cpp new file mode 100644 index 0000000000..8abb372596 --- /dev/null +++ b/tests/validation/reference/Reorder.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "Reorder.h" +#include "src/core/NEON/kernels/arm_gemm/utils.hpp" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ + +/* + * Generic transform. + * + * Assuming the untransposed case, this works by first reading <BlockBy> + * consecutive values from the first input row. This same number of values + * are then read from the next <IntBy-1> rows. Now return to the first + * input row and repeat. + * + * Need to cope with the work requested in either dimension not actually + * being a multiple of the block sizes. + */ +template <unsigned int tIntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize, typename d_type, arm_gemm::VLType vlt> +struct Transform_ref +{ + template <typename TOut, typename TIn> + static void Transform(TOut &out, const TIn in, const int stride, + const int y0, const int ymax, const int x0, const int xmax) + { + // NOTE: This code is disabled to avoid the call to get_vector_length(), so templated transforms will not be + // correct for SVE. This is not an issue as we have specializations for all SVE cases. + // For SVE cases we multiply the interleave factor by the vector length. + // const unsigned int IntBy = tIntBy * (vlt == VLType::SVE ? get_vector_length<TOut>() / BlockBy : 1); + const unsigned int IntBy = tIntBy; + int out_index = 0; + + const int n_whole_y_blocks = (ymax - y0) / IntBy; + const int y_remainders = (ymax - y0) % IntBy; + const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0); + + const int n_whole_x_blocks = (xmax - x0) / BlockBy; + const int x_remainders = (xmax - x0) % BlockBy; + const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0); + + // "Y" loop: advance down the rows of the source IntBy rows at a time. + // Set up fill_rows to show the number rows to copy from, and blank_rows + // for the number of blank rows to add. + for(int y_block = 0; y_block < n_y_blocks; y_block++) + { + const int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders; + const int blank_rows = IntBy - fill_rows; + + const int y_base = y0 + (y_block * IntBy); + + // So now advance along this block of rows, BlockBy columns at a time. + for(int x_block = 0; x_block < n_x_blocks; x_block++) + { + const int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders; + const int blank_cols = BlockBy - fill_cols; + + const int x_base = x0 + (x_block * BlockBy); + + for(int row = 0; row < fill_rows; row++) + { + for(int col = 0; col < fill_cols; col++) + { + // In-range copy. If it's transposed, we reverse the sense of rows and columns here. + if(Transposed) + { + out[out_index] = in[(x_base + col) * stride + y_base + row]; + out_index++; + } + else + { + out[out_index] = in[(y_base + row) * stride + x_base + col]; + out_index++; + } + } + // "col" tail - row is in range but column is out of range. + for(int col = 0; col < blank_cols; col++) + { + out[out_index] = 0; + out_index++; + } + } + // "row" tail - row is out of range so fill with zeros always. + const d_type zeroval = 0; + const int pads = blank_rows * (fill_cols + blank_cols); + + for(int i = 0; i < pads; i++) + { + out[out_index] = zeroval; + } + + out_index += pads; + } + } + } +}; + +template <typename T> +SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf) +{ + SimpleTensor<T> dst{ output_shape, src.data_type() }; + const int cols = src.shape()[0]; + const int rows = src.shape()[1]; + + switch(output_wf) + { + case WeightFormat::OHWIo4: + { + Transform_ref<4, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols); + break; + } + case WeightFormat::OHWIo8: + { + Transform_ref<8, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols); + break; + } + default: + break; + } + + return dst; +} + +template SimpleTensor<float> reorder_layer(const SimpleTensor<float> &src, const TensorShape &output_shape, WeightFormat output_wf); + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/Remap.h b/tests/validation/reference/Reorder.h index 0726f75965..94ee5078f8 100644 --- a/tests/validation/reference/Remap.h +++ b/tests/validation/reference/Reorder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,10 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_REMAP_H -#define ARM_COMPUTE_TEST_REMAP_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_REORDER +#define ACL_TESTS_VALIDATION_REFERENCE_REORDER #include "tests/SimpleTensor.h" +#include "tests/Types.h" namespace arm_compute { @@ -35,10 +36,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode, - T constant_border_value = 0); +SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_REMAP_H */ +#endif /* ACL_TESTS_VALIDATION_REFERENCE_REORDER */ diff --git a/tests/validation/reference/ReshapeLayer.cpp b/tests/validation/reference/ReshapeLayer.cpp index daea001be6..30a58dd65b 100644 --- a/tests/validation/reference/ReshapeLayer.cpp +++ b/tests/validation/reference/ReshapeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017,2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,14 +44,15 @@ SimpleTensor<T> reshape_layer(const SimpleTensor<T> &src, const TensorShape &out return dst; } -template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape); -template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape); +template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape); +template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape); template SimpleTensor<uint16_t> reshape_layer(const SimpleTensor<uint16_t> &src, const TensorShape &output_shape); -template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape); +template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape); template SimpleTensor<uint32_t> reshape_layer(const SimpleTensor<uint32_t> &src, const TensorShape &output_shape); -template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape); -template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape); -template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape); +template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape); +template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape); +template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape); +template SimpleTensor<bfloat16> reshape_layer(const SimpleTensor<bfloat16> &src, const TensorShape &output_shape); /** [ReshapeLayer] **/ } // namespace reference } // namespace validation diff --git a/tests/validation/reference/Reverse.cpp b/tests/validation/reference/Reverse.cpp index c6c4614278..7924f900d1 100644 --- a/tests/validation/reference/Reverse.cpp +++ b/tests/validation/reference/Reverse.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis) +SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis) { + ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4); ARM_COMPUTE_ERROR_ON(axis.shape().num_dimensions() > 1); ARM_COMPUTE_ERROR_ON(axis.shape().x() > 4); @@ -48,10 +49,32 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> const unsigned int depth = src.shape()[2]; const unsigned int batches = src.shape()[3]; + const int rank = src.shape().num_dimensions(); + std::array<bool, 4> to_reverse = { { false, false, false, false } }; for(int i = 0; i < axis.num_elements(); ++i) { - to_reverse[axis[i]] = true; + int axis_i = axis[i]; + + // The values of axis tensor must be between [-rank, rank-1]. + if((axis_i < -rank) || (axis_i >= rank)) + { + ARM_COMPUTE_ERROR("the values of the axis tensor must be within [-rank, rank-1]."); + } + + // In case of negative axis value i.e targeted axis(i) = rank + axis(i) + if(axis_i < 0) + { + axis_i = rank + axis_i; + } + + // Reverse ACL axis indices convention i.e. (inverted)axis = (tensor_rank - 1) - axis + if(use_inverted_axis) + { + axis_i = (rank - 1) - axis_i; + } + + to_reverse[axis_i] = true; } const uint32_t num_elements = src.num_elements(); @@ -73,9 +96,9 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> return dst; } -template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint32_t> &axis); -template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<uint32_t> &axis); -template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<uint32_t> &axis); +template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis); +template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis); +template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Reverse.h b/tests/validation/reference/Reverse.h index 4a28da7270..30926b05a5 100644 --- a/tests/validation/reference/Reverse.h +++ b/tests/validation/reference/Reverse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2019, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_TEST_REVERSE_H -#define ARM_COMPUTE_TEST_REVERSE_H +#ifndef ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H +#define ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H #include "tests/SimpleTensor.h" @@ -35,9 +35,9 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis); +SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis = false); } // namespace reference } // namespace validation } // namespace test } // namespace arm_compute -#endif /* ARM_COMPUTE_TEST_REVERSE_H */ +#endif // ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 71e98fd776..2f429cb29b 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #include "Scale.h" #include "Utils.h" -#include "arm_compute/core/utils/misc/Utility.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -183,14 +182,15 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale template <typename T> SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { + ARM_COMPUTE_UNUSED(output_quantization_info); return scale_core<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale, align_corners); } template <> SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { SimpleTensor<uint8_t> dst; if(src.quantization_info().uniform().scale != 0.f) @@ -198,7 +198,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo SimpleTensor<float> src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info()); SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners); - dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_quantization_info); } else { @@ -209,7 +209,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo template <> SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int8_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { SimpleTensor<int8_t> dst; if(src.quantization_info().uniform().scale != 0.f) @@ -217,7 +217,7 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float SimpleTensor<float> src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8_signed(constant_border_value, src.quantization_info()); SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners); - dst = convert_to_asymmetric<int8_t>(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric<int8_t>(dst_tmp, output_quantization_info); } else { @@ -227,11 +227,11 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float } template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Scale.h b/tests/validation/reference/Scale.h index c66af8d94e..c32c07d1c0 100644 --- a/tests/validation/reference/Scale.h +++ b/tests/validation/reference/Scale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace reference { template <typename T> SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0, - SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false); + SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false, QuantizationInfo output_quantization_info = QuantizationInfo()); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp new file mode 100644 index 0000000000..55c48a9002 --- /dev/null +++ b/tests/validation/reference/ScatterLayer.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ScatterLayer.h" +#include "tests/validation/Helpers.h" +#include "arm_compute/core/TensorShape.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +namespace +{ + +template <typename T> +T reduce_op(const T ¤t,const T &update,const ScatterFunction func) +{ + switch(func) + { + case ScatterFunction::Update: + return update; + break; + case ScatterFunction::Add: + return current + update; + break; + case ScatterFunction::Sub: + return current - update; + break; + case ScatterFunction::Max: + return std::max(current, update); + break; + case ScatterFunction::Min: + return std::min(current, update); + break; + default: + ARM_COMPUTE_ERROR("Unsupported Scatter function"); + break; + } +} + +template float reduce_op(const float ¤t,const float &update,const ScatterFunction func); +template half reduce_op(const half ¤t,const half &update,const ScatterFunction func); +} + +// NOTE: This function expects collapsed tensors as input. +// Batch dims for update/indices tensors should be collapsed into a single dim. +// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function. +template <typename T> +SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) +{ + // 1. If zero initialization variable is false, copy src data to dst. + SimpleTensor<T> dst{ out_shape, src.data_type(), 1 }; + if(!info.zero_initialization) + { + std::copy_n(src.data(), src.num_elements(), dst.data()); + } + + // Number of elements between each value of the dim being iterated through + const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1); + const unsigned int no_output_dims = out_shape.num_dimensions(); + + // Calculate output stride at given index for all output dims. + std::vector<unsigned int> out_stride_at_idx(no_output_dims); + for (unsigned int i = 0 ; i < no_output_dims; i++) + { + out_stride_at_idx[i] = out_shape.total_size_lower(i); + } + + const unsigned int indices_x_dim = static_cast<unsigned int>(indices.shape()[0]); + const unsigned int indices_y_dim = static_cast<unsigned int>(indices.shape()[1]); + + // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor. + for(unsigned int i = 0; i < indices_y_dim; i++) + { + // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim + // Starting index for both the update and indices tensors. + const unsigned int update_dim_start = i * data_stride; + const unsigned int indices_dim_start = i * indices_x_dim; + bool out_of_bounds = false; + unsigned int out_offset_acc = 0; + + // Iterate over each indices value for the relevant batch and accumulate the offset. + for(unsigned int j = 0; j < indices_x_dim; j++) + { + // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k + const int index_value = indices[indices_dim_start + j]; + const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value. + if(index_value < static_cast<int>(out_shape[out_dim]) && index_value >= 0) + { + out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation + } + else + { + out_of_bounds = true; + break; + } + } + + // If not out of bounds, copy update tensor elements to output + if(!out_of_bounds) + { + for (unsigned int j = 0 ; j < data_stride; j++) + { + dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func); + } + } + } + return dst; +} + +template <typename T> +SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info) +{ + return scatter_layer_internal<T>(src, updates, indices, out_shape, info); +} + +template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<half> scatter_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int32_t> scatter_layer(const SimpleTensor<int32_t> &src, const SimpleTensor<int32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint32_t> scatter_layer(const SimpleTensor<uint32_t> &src, const SimpleTensor<uint32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int16_t> scatter_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint16_t> scatter_layer(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<int8_t> scatter_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +template SimpleTensor<uint8_t> scatter_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/ScatterLayer.h b/tests/validation/reference/ScatterLayer.h new file mode 100644 index 0000000000..97d5e70b0d --- /dev/null +++ b/tests/validation/reference/ScatterLayer.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H +#define ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H + +#include "Utils.h" +#include "arm_compute/function_info/ScatterInfo.h" +#include "tests/SimpleTensor.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template <typename T> +SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info); + +template <typename T> +SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H diff --git a/tests/validation/reference/UtilsQuantizedAsymm.h b/tests/validation/reference/UtilsQuantizedAsymm.h index 1f593bb696..e5ecc66545 100644 --- a/tests/validation/reference/UtilsQuantizedAsymm.h +++ b/tests/validation/reference/UtilsQuantizedAsymm.h @@ -32,6 +32,22 @@ namespace test { namespace validation { +namespace +{ +#if __clang__ +// This has been tested on clang 7.0.2 (__clang_major__ == 7 && __clang_minor__ == 0 && __clang_patchlevel__ == 2) +inline int64_t to_int64(int32_t val) +{ + return static_cast<int64_t>(val) | ((val < 0) ? (((1ll << 32) - 1) << 32) : 0); +} +#else // __clang__ +inline int64_t to_int64(int32_t val) +{ + return static_cast<int64_t>(val); +} +#endif // __clang__ +} // namespace + /** Rounded to nearest division by a power-of-two. */ inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent) { @@ -43,12 +59,12 @@ inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent) /** Multiplication of two integers. The same as ARMv7 Arm® Neon™ VQRDMULH instruction. */ inline int32_t asymm_int_mult(int32_t a, int32_t b) { - bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); - int64_t a_64(a); - int64_t b_64(b); - int64_t ab_64 = a_64 * b_64; - int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); - int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); + const bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); + const int64_t a_64 = to_int64(a); + const int64_t b_64 = to_int64(b); + const int64_t ab_64 = a_64 * b_64; + const int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + const int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32; } |