aboutsummaryrefslogtreecommitdiff
path: root/tests/validation/reference
diff options
context:
space:
mode:
Diffstat (limited to 'tests/validation/reference')
-rw-r--r--tests/validation/reference/ActivationLayer.cpp27
-rw-r--r--tests/validation/reference/ActivationLayer.h29
-rw-r--r--tests/validation/reference/BatchToSpaceLayer.cpp51
-rw-r--r--tests/validation/reference/BatchToSpaceLayer.h5
-rw-r--r--tests/validation/reference/Conv3D.cpp260
-rw-r--r--tests/validation/reference/Conv3D.h47
-rw-r--r--tests/validation/reference/Convolution3d.h4
-rw-r--r--tests/validation/reference/DFT.cpp8
-rw-r--r--tests/validation/reference/DepthConvertLayer.cpp23
-rw-r--r--tests/validation/reference/DequantizationLayer.cpp9
-rw-r--r--tests/validation/reference/ElementwiseOperations.cpp11
-rw-r--r--tests/validation/reference/ElementwiseUnary.cpp109
-rw-r--r--tests/validation/reference/ElementwiseUnary.h4
-rw-r--r--tests/validation/reference/FullyConnectedLayer.cpp4
-rw-r--r--tests/validation/reference/GEMM.cpp95
-rw-r--r--tests/validation/reference/GEMM.h11
-rw-r--r--tests/validation/reference/GEMMLowp.cpp12
-rw-r--r--tests/validation/reference/GEMMLowp.h11
-rw-r--r--tests/validation/reference/Gather.cpp53
-rw-r--r--tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp98
-rw-r--r--tests/validation/reference/IndirectConv2dAddressPrecalculation.h44
-rw-r--r--tests/validation/reference/MeanStdDevNormalizationLayer.cpp11
-rw-r--r--tests/validation/reference/Permute.cpp18
-rw-r--r--tests/validation/reference/Pooling3dLayer.cpp220
-rw-r--r--tests/validation/reference/Pooling3dLayer.h50
-rw-r--r--tests/validation/reference/PoolingLayer.cpp21
-rw-r--r--tests/validation/reference/QuantizationLayer.cpp2
-rw-r--r--tests/validation/reference/ReductionOperation.cpp79
-rw-r--r--tests/validation/reference/ReductionOperation.h7
-rw-r--r--tests/validation/reference/Remap.cpp112
-rw-r--r--tests/validation/reference/Reorder.cpp156
-rw-r--r--tests/validation/reference/Reorder.h (renamed from tests/validation/reference/Remap.h)12
-rw-r--r--tests/validation/reference/ReshapeLayer.cpp15
-rw-r--r--tests/validation/reference/Reverse.cpp35
-rw-r--r--tests/validation/reference/Reverse.h10
-rw-r--r--tests/validation/reference/Scale.cpp20
-rw-r--r--tests/validation/reference/Scale.h4
-rw-r--r--tests/validation/reference/ScatterLayer.cpp152
-rw-r--r--tests/validation/reference/ScatterLayer.h48
-rw-r--r--tests/validation/reference/UtilsQuantizedAsymm.h28
40 files changed, 1595 insertions, 320 deletions
diff --git a/tests/validation/reference/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp
index 664b969125..2172362bdd 100644
--- a/tests/validation/reference/ActivationLayer.cpp
+++ b/tests/validation/reference/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "ActivationLayer.h"
#include "arm_compute/core/Types.h"
+
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -40,7 +41,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo
ARM_COMPUTE_UNUSED(oq_info);
// Create reference
- SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
+ SimpleTensor<T> dst{src.shape(), src.data_type(), 1};
// Compute reference
const T a(info.a());
@@ -48,7 +49,7 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo
#if defined(_OPENMP)
#pragma omp parallel for
#endif /* _OPENMP */
- for(int i = 0; i < src.num_elements(); ++i)
+ for (int i = 0; i < src.num_elements(); ++i)
{
dst[i] = activate_float<T>(src[i], a, b, info.activation());
}
@@ -57,7 +58,8 @@ SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo
}
template <>
-SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+SimpleTensor<uint8_t>
+activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
@@ -68,7 +70,8 @@ SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src
}
template <>
-SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+SimpleTensor<int8_t>
+activation_layer<int8_t>(const SimpleTensor<int8_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
@@ -79,7 +82,8 @@ SimpleTensor<int8_t> activation_layer<int8_t>(const SimpleTensor<int8_t> &src, A
}
template <>
-SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
+SimpleTensor<int16_t>
+activation_layer<int16_t>(const SimpleTensor<int16_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info)
{
const QuantizationInfo dst_qinfo = oq_info.empty() ? src.quantization_info() : oq_info;
@@ -88,9 +92,14 @@ SimpleTensor<int16_t> activation_layer<int16_t>(const SimpleTensor<int16_t> &src
SimpleTensor<int16_t> dst = convert_to_symmetric<int16_t>(dst_tmp, dst_qinfo);
return dst;
}
-template SimpleTensor<int32_t> activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
-template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
-template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<int32_t>
+activation_layer(const SimpleTensor<int32_t> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<float>
+activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<half>
+activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
+template SimpleTensor<bfloat16>
+activation_layer(const SimpleTensor<bfloat16> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ActivationLayer.h b/tests/validation/reference/ActivationLayer.h
index 8aad1af63e..7f896bd696 100644
--- a/tests/validation/reference/ActivationLayer.h
+++ b/tests/validation/reference/ActivationLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020,2022,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_ACTIVATION_LAYER_H
-#define ARM_COMPUTE_TEST_ACTIVATION_LAYER_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H
+#define ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -40,7 +40,7 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
{
T ret;
- switch(activation)
+ switch (activation)
{
case ActivationLayerInfo::ActivationFunction::ABS:
ret = std::abs(x);
@@ -61,13 +61,13 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
ret = std::min<T>(a, std::max<T>(b, x));
break;
case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
- ret = (x > 0) ? x : a * x;
+ ret = x > static_cast<T>(0) ? x : static_cast<T>(a * x);
break;
case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
- ret = std::log(static_cast<T>(1) + std::exp(x));
+ ret = std::log(static_cast<T>(1) + std::exp(static_cast<double>(x)));
break;
case ActivationLayerInfo::ActivationFunction::ELU:
- ret = (x > 0) ? x : a * (std::exp(x) - static_cast<T>(1));
+ ret = x > static_cast<T>(0) ? x : static_cast<T>(a * (std::exp(x) - static_cast<T>(1)));
break;
case ActivationLayerInfo::ActivationFunction::SQRT:
ret = std::sqrt(x);
@@ -82,7 +82,14 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
ret = x;
break;
case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
- ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) * 0.166666667f);
+ ret = x * ((std::min(std::max(static_cast<T>(x + 3), static_cast<T>(0.0f)), static_cast<T>(6.0f))) *
+ 0.166666667f);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ ret = static_cast<T>(x) / (static_cast<T>(1) + std::exp(-a * x));
+ break;
+ case ActivationLayerInfo::ActivationFunction::GELU:
+ ret = x * 0.5f * (1 + erf(x / std::sqrt(2.0f)));
break;
default:
ARM_COMPUTE_ERROR("Unsupported activation function");
@@ -93,9 +100,11 @@ inline T activate_float(T x, T a, T b, ActivationLayerInfo::ActivationFunction a
}
template <typename T>
-SimpleTensor<T> activation_layer(const SimpleTensor<T> &src, ActivationLayerInfo info, const QuantizationInfo &oq_info = QuantizationInfo());
+SimpleTensor<T> activation_layer(const SimpleTensor<T> &src,
+ ActivationLayerInfo info,
+ const QuantizationInfo &oq_info = QuantizationInfo());
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_ACTIVATION_LAYER_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_ACTIVATIONLAYER_H
diff --git a/tests/validation/reference/BatchToSpaceLayer.cpp b/tests/validation/reference/BatchToSpaceLayer.cpp
index 404ee73cac..63d121f59b 100644
--- a/tests/validation/reference/BatchToSpaceLayer.cpp
+++ b/tests/validation/reference/BatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,10 @@
*/
#include "BatchToSpaceLayer.h"
+#include "arm_compute/core/Validate.h"
#include "tests/validation/Helpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
namespace arm_compute
{
namespace test
@@ -35,32 +37,37 @@ namespace reference
{
// Batch to Space
template <typename T>
-SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape)
+SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape)
{
- ARM_COMPUTE_ERROR_ON(block_shape[0] <= 0);
- ARM_COMPUTE_ERROR_ON(block_shape[1] <= 0);
- SimpleTensor<T> result(dst_shape, src.data_type());
+ ARM_COMPUTE_ERROR_ON(block_shape[0] < 1);
+ ARM_COMPUTE_ERROR_ON(block_shape[1] < 1);
+ const auto expected_dst_shape = misc::shape_calculator::compute_batch_to_space_shape(DataLayout::NCHW, src.shape(), block_shape[0], block_shape[1], crop_info);
+ ARM_COMPUTE_ERROR_ON(arm_compute::detail::have_different_dimensions(expected_dst_shape, dst_shape, 0));
+ ARM_COMPUTE_UNUSED(expected_dst_shape);
- int in_pos = 0;
- const auto width_in = static_cast<int>(src.shape()[0]);
- const auto height_in = static_cast<int>(src.shape()[1]);
- const auto z_in = static_cast<int>(src.shape()[2]);
- const auto batch_in = static_cast<int>(src.shape()[3]);
+ SimpleTensor<T> result(dst_shape, src.data_type());
+ int out_pos = 0;
+ const auto width_out = static_cast<int>(dst_shape[0]);
+ const auto height_out = static_cast<int>(dst_shape[1]);
+ const auto z_out = static_cast<int>(dst_shape[2]);
+ const auto batch_out = static_cast<int>(dst_shape[3]);
- for(int batch = 0; batch < batch_in; ++batch)
+ for(int batch = 0; batch < batch_out; ++batch)
{
- for(int z = 0; z < z_in; ++z)
+ for(int z = 0; z < z_out; ++z)
{
- for(int y = 0; y < height_in; ++y)
+ for(int y = 0; y < height_out; ++y)
{
- for(int x = 0; x < width_in; ++x)
+ for(int x = 0; x < width_out; ++x)
{
- const int r = src.shape()[3] / (block_shape[0] * block_shape[1]);
- const int out_x = (block_shape[0] * x + (batch / r) % block_shape[0]);
- const int out_y = (block_shape[1] * y + (batch / r) / block_shape[0]);
- const int out_pos = out_x + dst_shape[0] * out_y + z * dst_shape[0] * dst_shape[1] + (batch % r) * dst_shape[0] * dst_shape[1] * dst_shape[2];
- result[out_pos] = src[in_pos];
- ++in_pos;
+ const int x_c = x + crop_info.left;
+ const int y_c = y + crop_info.top;
+ const int in_batch = batch + ((x_c % block_shape[0]) + (y_c % block_shape[1]) * (block_shape[0])) * dst_shape[3];
+ const int in_x = x_c / block_shape[0];
+ const int in_y = y_c / block_shape[1];
+ const int in_pos = in_x + src.shape()[0] * in_y + z * src.shape()[0] * src.shape()[1] + in_batch * src.shape()[0] * src.shape()[1] * src.shape()[2];
+ result[out_pos] = src[in_pos];
+ ++out_pos;
}
}
}
@@ -68,8 +75,8 @@ SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<in
return result;
}
-template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
-template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+template SimpleTensor<float> batch_to_space(const SimpleTensor<float> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape);
+template SimpleTensor<half> batch_to_space(const SimpleTensor<half> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/BatchToSpaceLayer.h b/tests/validation/reference/BatchToSpaceLayer.h
index 52556cb53f..a37bfc3373 100644
--- a/tests/validation/reference/BatchToSpaceLayer.h
+++ b/tests/validation/reference/BatchToSpaceLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H
#define ARM_COMPUTE_TEST_BATCH_TO_SPACE_LAYER_H
+#include "arm_compute/core/Types.h"
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -36,7 +37,7 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &block_shape, const TensorShape &dst_shape);
+SimpleTensor<T> batch_to_space(const SimpleTensor<T> &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst_shape);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Conv3D.cpp b/tests/validation/reference/Conv3D.cpp
new file mode 100644
index 0000000000..e4010a507a
--- /dev/null
+++ b/tests/validation/reference/Conv3D.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Conv3D.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "support/AclRequires.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+// Source/Destination Tensor shape indices (N D H W C)
+constexpr unsigned int batch_dim = 4u;
+constexpr unsigned int depth_dim = 3u;
+constexpr unsigned int height_dim = 2u;
+constexpr unsigned int width_dim = 1u;
+constexpr unsigned int channel_dim = 0u;
+
+// Weight tensor shape indices (D H W Cin Cout)
+constexpr unsigned int weights_depth_dim = 4u;
+constexpr unsigned int weights_height_dim = 3u;
+constexpr unsigned int weights_width_dim = 2u;
+constexpr unsigned int weights_CHin_dim = 1u;
+constexpr unsigned int weights_CHout_dim = 0u;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+inline bool is_valid_pixel(int i, int min, int max)
+{
+ return (i >= min && i < max);
+}
+
+// Evaluate the weights against an element in a given tensor.
+template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
+T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
+ int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
+{
+ ARM_COMPUTE_UNUSED(oq_info);
+
+ const unsigned int weights_width = weights.shape()[weights_width_dim];
+ const unsigned int weights_height = weights.shape()[weights_height_dim];
+ const unsigned int weights_depth = weights.shape()[weights_depth_dim];
+
+ const unsigned int src_channels = src.shape()[channel_dim];
+ const unsigned int src_width = src.shape()[width_dim];
+ const unsigned int src_height = src.shape()[height_dim];
+ const unsigned int src_depth = src.shape()[depth_dim];
+
+ T total(0);
+ for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
+ {
+ const int idx_z = z_start + dilation.depth * weight_d;
+ for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y)
+ {
+ const int idx_y = y_start + dilation.height * weight_y;
+ for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x)
+ {
+ const int idx_x = x_start + dilation.width * weight_x;
+
+ //Check if the point is within padding
+ const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width);
+ const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height);
+ const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth);
+ const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid);
+ if(is_invalid_pixel)
+ {
+ continue;
+ }
+
+ for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in)
+ {
+ const T *in_ptr = src.data();
+ const T *w_ptr = weights.data();
+
+ const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch });
+ const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d });
+ T input_value = in_ptr[in_offset];
+ T weight_value = w_ptr[weight_offset];
+ total += (input_value * weight_value);
+ }
+ }
+ }
+ }
+
+ const TB *b_ptr = bias.data();
+ TB bias_value = b_ptr[ch_out];
+
+ return total + bias_value;
+}
+
+template < typename T, typename TB, ARM_COMPUTE_REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
+T calculate_conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const Size3D &dilation, int batch,
+ int z_start, int y_start, int x_start, int ch_out, UniformQuantizationInfo oq_info)
+{
+ const unsigned int weights_width = weights.shape()[weights_width_dim];
+ const unsigned int weights_height = weights.shape()[weights_height_dim];
+ const unsigned int weights_depth = weights.shape()[weights_depth_dim];
+
+ const unsigned int src_channels = src.shape()[channel_dim];
+ const unsigned int src_width = src.shape()[width_dim];
+ const unsigned int src_height = src.shape()[height_dim];
+ const unsigned int src_depth = src.shape()[depth_dim];
+
+ const UniformQuantizationInfo iq_info = src.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
+ const int input_offset = -iq_info.offset;
+ const float input_scale = iq_info.scale;
+ int weights_offset = -wq_info.offset;
+ float weights_scale = wq_info.scale;
+ const int output_offset = oq_info.offset;
+ const float output_scale = oq_info.scale;
+
+ int output_multiplier = 0;
+ int output_shift = 0;
+ const float multiplier = input_scale * weights_scale / output_scale;
+ arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
+
+ int32_t total(0);
+ for(unsigned int weight_d = 0; weight_d < weights_depth; ++weight_d)
+ {
+ const int idx_z = z_start + dilation.depth * weight_d;
+ for(unsigned int weight_y = 0; weight_y < weights_height; ++weight_y)
+ {
+ const int idx_y = y_start + dilation.height * weight_y;
+ for(unsigned int weight_x = 0; weight_x < weights_width; ++weight_x)
+ {
+ const int idx_x = x_start + dilation.width * weight_x;
+
+ //Check if the point is within padding
+ const bool is_x_valid = is_valid_pixel(idx_x, 0, src_width);
+ const bool is_y_valid = is_valid_pixel(idx_y, 0, src_height);
+ const bool is_z_valid = is_valid_pixel(idx_z, 0, src_depth);
+ const bool is_invalid_pixel = !(is_x_valid && is_y_valid && is_z_valid);
+ if(is_invalid_pixel)
+ {
+ continue;
+ }
+
+ for(unsigned int ch_in = 0; ch_in < src_channels; ++ch_in)
+ {
+ const T *in_ptr = src.data();
+ const T *w_ptr = weights.data();
+
+ const int in_offset = coord2index(src.shape(), Coordinates{ ch_in, idx_x, idx_y, idx_z, batch });
+ const int weight_offset = coord2index(weights.shape(), Coordinates{ ch_out, ch_in, weight_x, weight_y, weight_d });
+ T input_value = in_ptr[in_offset];
+ T weight_value = w_ptr[weight_offset];
+ total += ((input_value + input_offset) * (weight_value + weights_offset));
+ }
+ }
+ }
+ }
+
+ const TB *b_ptr = bias.data();
+ TB bias_value = b_ptr[ch_out];
+
+ total += bias_value;
+
+ return validation::quantize_down_scale_by_fixedpoint(total, output_multiplier, output_shift, output_offset,
+ std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max());
+}
+} // namespace
+
+template <typename T, typename TB>
+SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, const Conv3dInfo &conv3d_info)
+{
+ // Compute reference
+ const unsigned int batch_size = src.shape()[batch_dim];
+ const unsigned int dst_width = dst.shape()[width_dim];
+ const unsigned int dst_height = dst.shape()[height_dim];
+ const unsigned int dst_depth = dst.shape()[depth_dim];
+ const unsigned int src_channels = src.shape()[channel_dim];
+ const unsigned int weights_out_ch = weights.shape()[weights_CHout_dim];
+ const unsigned int dst_channels = dst.shape()[channel_dim];
+ const size_t pad_left = conv3d_info.padding.left;
+ const size_t pad_top = conv3d_info.padding.top;
+ const size_t pad_front = conv3d_info.padding.front;
+ const size_t stride_x = conv3d_info.stride.x();
+ const size_t stride_y = conv3d_info.stride.y();
+ const size_t stride_z = conv3d_info.stride.z();
+
+ const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_conv3d_shape(src.shape(), weights.shape(), conv3d_info);
+
+ ARM_COMPUTE_UNUSED(src_channels, weights_out_ch, dst_channels, dst_shape, weights_CHin_dim);
+ // Number of batches of source and destination tensors must match.
+ ARM_COMPUTE_ERROR_ON(src.shape()[batch_dim] != dst.shape()[batch_dim]);
+ // Input channels in the source and weights must match.
+ ARM_COMPUTE_ERROR_ON(src_channels != weights.shape()[weights_CHin_dim]);
+ // Weight channels in the destination and weights must match.
+ ARM_COMPUTE_ERROR_ON(weights_out_ch != dst_channels);
+ // Bias must match the number of destination channels.
+ ARM_COMPUTE_ERROR_ON(bias.shape()[0] != dst_channels);
+ // Compare given dst tensor shape with expected shape.
+ ARM_COMPUTE_ERROR_ON(dst.shape() != dst_shape);
+
+ for(unsigned int batch = 0; batch < batch_size; ++batch)
+ {
+ for(unsigned int z_out = 0; z_out < dst_depth; ++z_out)
+ {
+ const int z_start = (z_out * stride_z) - pad_front;
+ for(unsigned int y_out = 0; y_out < dst_height; ++y_out)
+ {
+ const int y_start = (y_out * stride_y) - pad_top;
+ for(unsigned int x_out = 0; x_out < dst_width; ++x_out)
+ {
+ const int x_start = (x_out * stride_x) - pad_left;
+ for(unsigned int ch_out = 0; ch_out < dst_channels; ++ch_out)
+ {
+ T *out_ptr = dst.data();
+
+ const int out_offset = coord2index(dst.shape(), Coordinates{ ch_out, x_out, y_out, z_out, batch });
+ out_ptr[out_offset] = calculate_conv3d<T, TB>(src, weights, bias, conv3d_info.dilation, batch, z_start, y_start, x_start, ch_out, dst.quantization_info().uniform());
+ }
+ }
+ }
+ }
+ }
+ return dst;
+}
+
+template SimpleTensor<float> conv3d(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, SimpleTensor<float> &dst,
+ const Conv3dInfo &conv3d_info);
+template SimpleTensor<half> conv3d(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, SimpleTensor<half> &dst,
+ const Conv3dInfo &conv3d_info);
+template SimpleTensor<uint8_t> conv3d(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst,
+ const Conv3dInfo &conv3d_info);
+template SimpleTensor<int8_t> conv3d(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<int8_t> &dst,
+ const Conv3dInfo &conv3d_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Conv3D.h b/tests/validation/reference/Conv3D.h
new file mode 100644
index 0000000000..e3674f4bfb
--- /dev/null
+++ b/tests/validation/reference/Conv3D.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CONV3D_LAYER_H
+#define ARM_COMPUTE_TEST_CONV3D_LAYER_H
+
+#include "Utils.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename TB>
+SimpleTensor<T> conv3d(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst,
+ const Conv3dInfo &conv3d_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CONV3D_LAYER_H */
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 1666e3857b..b67e88e839 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,7 @@
#define ARM_COMPUTE_TEST_VALIDATION_CONVOLUTION_H
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "support/Requires.h"
+#include "support/AclRequires.h"
#include "tests/validation/Helpers.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
diff --git a/tests/validation/reference/DFT.cpp b/tests/validation/reference/DFT.cpp
index fd126c7d73..2b03c270ac 100644
--- a/tests/validation/reference/DFT.cpp
+++ b/tests/validation/reference/DFT.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -400,10 +400,10 @@ SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w,
auto padded_src = pad_layer(src, padding_in);
// Flip weights
- std::vector<uint32_t> axis_v = { 0, 1 };
- SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
+ std::vector<uint32_t> axis_v = { 0, 1 };
+ SimpleTensor<int32_t> axis{ TensorShape(2U), DataType::S32 };
std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
- auto flipped_w = reverse(w, axis);
+ auto flipped_w = reverse(w, axis, /* use_inverted_axis */ false);
// Pad weights to have the same size as input
const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index 94c719ade7..3f88897f8e 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -165,7 +165,7 @@ template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, Data
template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
// BFLOAT16
-template SimpleTensor<float> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<bfloat16> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
// F16
template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
@@ -186,6 +186,25 @@ template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, Dat
template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<bfloat16> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+// S64
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U64
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint64_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index 64a89aa6a0..67d69c2c38 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,6 +59,12 @@ TOut dequantize(int16_t val, const UniformQuantizationInfo qinfo, DataType dt)
ARM_COMPUTE_UNUSED(dt);
return static_cast<TOut>(dequantize_qsymm16(val, qinfo));
}
+template <typename TOut>
+TOut dequantize(int32_t val, const UniformQuantizationInfo qinfo, DataType dt)
+{
+ ARM_COMPUTE_UNUSED(dt);
+ return static_cast<TOut>(dequantize_s32(val, qinfo));
+}
} // namespace
template <typename TOut, typename TIn>
SimpleTensor<TOut> dequantization_layer(const SimpleTensor<TIn> &src)
@@ -115,6 +121,7 @@ template SimpleTensor<half> dequantization_layer(const SimpleTensor<int8_t> &src
template SimpleTensor<float> dequantization_layer(const SimpleTensor<int8_t> &src);
template SimpleTensor<half> dequantization_layer(const SimpleTensor<int16_t> &src);
template SimpleTensor<float> dequantization_layer(const SimpleTensor<int16_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<int32_t> &src);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index f22c84e153..edbbab8600 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -74,15 +74,6 @@ T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy
case ArithmeticOperation::DIV:
{
val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2));
- if(std::is_integral<T>::value)
- {
- // Implement flooring division
- val = (src2 == 0) ? 0 : val;
- if(static_cast<int32_t>(src1) % static_cast<int32_t>(src2) != 0 && ((src1 < 0) != (src2 < 0)))
- {
- --val;
- }
- }
break;
}
case ArithmeticOperation::POWER:
diff --git a/tests/validation/reference/ElementwiseUnary.cpp b/tests/validation/reference/ElementwiseUnary.cpp
index 5333b53c15..558f9d24fc 100644
--- a/tests/validation/reference/ElementwiseUnary.cpp
+++ b/tests/validation/reference/ElementwiseUnary.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,8 @@
* SOFTWARE.
*/
#include "ElementwiseUnary.h"
-
+#include "tests/validation/Helpers.h"
+#include "utils/TypePrinter.h"
namespace arm_compute
{
namespace test
@@ -32,10 +33,8 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op)
+SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op)
{
- SimpleTensor<T> dst(src.shape(), src.data_type());
-
for(int i = 0; i < src.num_elements(); ++i)
{
switch(op)
@@ -65,13 +64,107 @@ SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary o
ARM_COMPUTE_ERROR("Not implemented");
}
}
+ return dst;
+}
+template <>
+SimpleTensor<int8_t> elementwise_unary(const SimpleTensor<int8_t> &src, SimpleTensor<int8_t> &dst, ElementWiseUnary op)
+{
+ if(dst.data_type() == DataType::QASYMM8_SIGNED)
+ {
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp(src.shape(), DataType::F32);
+ for(int i = 0; i < src.num_elements(); ++i)
+ {
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]);
+ }
+ else
+ {
+ // rsqrt(0) give 'inf' so set to the maximum in int8: 127
+ dst_tmp[i] = (127.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ;
+ }
+ break;
+
+ case ElementWiseUnary::LOG:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = std::log(src_tmp[i]);
+ }
+ else
+ {
+ dst_tmp[i] = (-128.0f - dst.quantization_info().uniform().offset) * dst.quantization_info().uniform().scale ;
+ }
+ break;
+
+ default:
+ elementwise_unary(src_tmp, dst_tmp, op);
+ break;
+ }
+ }
+ dst = convert_to_asymmetric<int8_t>(dst_tmp, dst.quantization_info());
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+ return dst;
+}
+template <>
+SimpleTensor<uint8_t> elementwise_unary(const SimpleTensor<uint8_t> &src, SimpleTensor<uint8_t> &dst, ElementWiseUnary op)
+{
+ if(dst.data_type() == DataType::QASYMM8)
+ {
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp(src.shape(), DataType::F32);
+ for(int i = 0; i < src.num_elements(); ++i)
+ {
+ switch(op)
+ {
+ case ElementWiseUnary::RSQRT:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = 1.f / std::sqrt(src_tmp[i]);
+ }
+ else
+ {
+ // rsqrt(0) give 'inf' so set to the maximum in uint8: 255
+ dst_tmp[i] = (255.0f - dst.quantization_info().uniform().offset)* dst.quantization_info().uniform().scale;
+ }
+ break;
+ case ElementWiseUnary::LOG:
+ if(src_tmp[i] != 0)
+ {
+ dst_tmp[i] = std::log(src_tmp[i]);
+ }
+ else
+ {
+ dst_tmp[i] = -dst.quantization_info().uniform().offset * dst.quantization_info().uniform().scale;
+ }
+ break;
+
+ default:
+ elementwise_unary(src_tmp, dst_tmp, op);
+ break;
+ }
+ }
+ dst = convert_to_asymmetric<uint8_t>(dst_tmp, dst.quantization_info());
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
return dst;
}
-template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, ElementWiseUnary op);
-template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, ElementWiseUnary op);
-template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, ElementWiseUnary op);
+template SimpleTensor<float> elementwise_unary(const SimpleTensor<float> &src, SimpleTensor<float> &dst, ElementWiseUnary op);
+template SimpleTensor<half> elementwise_unary(const SimpleTensor<half> &src, SimpleTensor<half> &dst, ElementWiseUnary op);
+template SimpleTensor<int32_t> elementwise_unary(const SimpleTensor<int32_t> &src, SimpleTensor<int32_t> &dst, ElementWiseUnary op);
+
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ElementwiseUnary.h b/tests/validation/reference/ElementwiseUnary.h
index be4a229a5b..ae7a49bce4 100644
--- a/tests/validation/reference/ElementwiseUnary.h
+++ b/tests/validation/reference/ElementwiseUnary.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,7 +35,7 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, ElementWiseUnary op);
+SimpleTensor<T> elementwise_unary(const SimpleTensor<T> &src, SimpleTensor<T> &dst, ElementWiseUnary op);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index 21333958f8..af30e9ee54 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -123,7 +123,7 @@ SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTe
// Create reference
SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, out_quant_info };
- // Sanity checks
+ // Health checks
const int num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1);
const int num_input_dimensions = src.shape().num_dimensions() - num_batch_dimensions;
const unsigned int linear_input_size = src.shape().total_size_lower(num_input_dimensions);
diff --git a/tests/validation/reference/GEMM.cpp b/tests/validation/reference/GEMM.cpp
index 6b3aa390f0..d513343796 100644
--- a/tests/validation/reference/GEMM.cpp
+++ b/tests/validation/reference/GEMM.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,6 +25,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
namespace arm_compute
{
@@ -35,10 +36,11 @@ namespace validation
namespace reference
{
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
+SimpleTensor<T>
+gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
{
// Create reference
- SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 };
+ SimpleTensor<T> dst{c.shape(), c.data_type(), 1};
// Compute reference
const int M = a.shape().y();
@@ -50,30 +52,47 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
const int a_stride_z = K * M;
const int a_stride_w = K * M * D;
- const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
- const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+ const int b_stride_z =
+ b.shape().num_dimensions() > 2
+ ? N * K
+ : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
+ int b_stride_w =
+ b.shape().num_dimensions() > 3
+ ? K * N * D
+ : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+
+ // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions:
+ // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4
+ const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 &&
+ c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1;
+
+ // Batched-GEMM
+ if (is_batched_gemm)
+ {
+ b_stride_w = b_stride_z;
+ }
const int c_stride_z = N * M;
const int c_stride_w = N * M * D;
-#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__))
+#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__))
#pragma omp parallel for collapse(2)
#endif /* _OPENMP */
- for(int w = 0; w < W; ++w)
+ for (int w = 0; w < W; ++w)
{
- for(int depth = 0; depth < D; ++depth)
+ for (int depth = 0; depth < D; ++depth)
{
const int base_addr_a = depth * a_stride_z + w * a_stride_w;
const int base_addr_b = depth * b_stride_z + w * b_stride_w;
const int base_addr_c = depth * c_stride_z + w * c_stride_w;
- for(int row = 0; row < M; ++row)
+ for (int row = 0; row < M; ++row)
{
- for(int col = 0; col < N; ++col)
+ for (int col = 0; col < N; ++col)
{
T acc(0);
- for(int k = 0; k < K; ++k)
+ for (int k = 0; k < K; ++k)
{
acc += a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N];
}
@@ -89,11 +108,12 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
}
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
+SimpleTensor<T> gemm_mixed_precision(
+ const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
{
// GEMM mixed-precision combines F32 accumulators with F16 multiplications
// Create reference
- SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 };
+ SimpleTensor<T> dst{c.shape(), c.data_type(), 1};
// Compute reference
const int M = a.shape().y();
@@ -105,36 +125,54 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso
const int a_stride_z = K * M;
const int a_stride_w = K * M * D;
- const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
- const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+ const int b_stride_z =
+ b.shape().num_dimensions() > 2
+ ? N * K
+ : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
+ int b_stride_w =
+ b.shape().num_dimensions() > 3
+ ? K * N * D
+ : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
+
+ // Note: There are 3 gemm types: batched-gemm, multi-gemm, and batched of multi-gemms. The third dimension of tensor b is overloaded when tensor b has exactly 3 dimensions:
+ // it can be either number of batches or multis. Batched-GEMM computation is detected only when the third dimension of "a" and "c" tensors is 1 and the number of dimensions is 4
+ const bool is_batched_gemm = b.shape().num_dimensions() == 3 && a.shape().num_dimensions() == 4 &&
+ c.shape().num_dimensions() == 4 && a.shape()[2] == 1 && c.shape()[2] == 1;
+
+ // Batched-GEMM
+ if (is_batched_gemm)
+ {
+ b_stride_w = b_stride_z;
+ }
const int c_stride_z = N * M;
const int c_stride_w = N * M * D;
-#if defined(_OPENMP) && !( defined(__arm__) && defined(__ANDROID__))
+#if defined(_OPENMP) && !(defined(__arm__) && defined(__ANDROID__))
#pragma omp parallel for collapse(2)
#endif /* _OPENMP */
- for(int w = 0; w < W; ++w)
+ for (int w = 0; w < W; ++w)
{
- for(int depth = 0; depth < D; ++depth)
+ for (int depth = 0; depth < D; ++depth)
{
const int base_addr_a = depth * a_stride_z + w * a_stride_w;
const int base_addr_b = depth * b_stride_z + w * b_stride_w;
const int base_addr_c = depth * c_stride_z + w * c_stride_w;
- for(int row = 0; row < M; ++row)
+ for (int row = 0; row < M; ++row)
{
- for(int col = 0; col < N; ++col)
+ for (int col = 0; col < N; ++col)
{
float acc(0);
- for(int k = 0; k < K; ++k)
+ for (int k = 0; k < K; ++k)
{
acc += static_cast<float>(a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N]);
}
// Finalize the result: alpha * A * B + beta * C
- dst[base_addr_c + col + row * N] = static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]);
+ dst[base_addr_c + col + row * N] =
+ static_cast<T>(alpha * acc + beta * c[base_addr_c + col + row * N]);
}
}
}
@@ -143,8 +181,21 @@ SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTenso
return dst;
}
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
+void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst)
+{
+ // Compute reference
+ SimpleTensor<T> dst_gemm = gemm(a, b, c, alpha, beta);
+ reference::arithmetic_operation<T>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE);
+}
+
+template SimpleTensor<bfloat16> gemm(const SimpleTensor<bfloat16> &a, const SimpleTensor<bfloat16> &b, const SimpleTensor<bfloat16> &c, float alpha, float beta);
template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta);
template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta);
+
+template void gemm_accumulate(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta, SimpleTensor<float> &dst);
+template void gemm_accumulate(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta, SimpleTensor<half> &dst);
+
template SimpleTensor<half> gemm_mixed_precision(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta);
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/GEMM.h b/tests/validation/reference/GEMM.h
index 5feaeda584..1b97570122 100644
--- a/tests/validation/reference/GEMM.h
+++ b/tests/validation/reference/GEMM.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMM_H
-#define ARM_COMPUTE_TEST_GEMM_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMM_H
+#define ACL_TESTS_VALIDATION_REFERENCE_GEMM_H
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -41,8 +41,11 @@ SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const S
template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
SimpleTensor<T> gemm_mixed_precision(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta);
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
+void gemm_accumulate(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta, SimpleTensor<T> &dst);
+
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMM_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMM_H
diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp
index 1615b51e73..30c577d850 100644
--- a/tests/validation/reference/GEMMLowp.cpp
+++ b/tests/validation/reference/GEMMLowp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "GEMMLowp.h"
#include "arm_compute/core/Types.h"
+#include "tests/validation/reference/ArithmeticOperations.h"
#include "tests/validation/reference/UtilsQuantizedAsymm.h"
#include "support/ToolchainSupport.h"
@@ -230,6 +231,13 @@ SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, c
return c;
}
+template <typename T_out, typename T_in, typename T_in_1>
+void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T_in> &a, const SimpleTensor<T_in_1> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T_out> &dst)
+{
+ SimpleTensor<T_out> dst_gemm = gemmlowp_matrix_multiply_core<T_out, T_in, T_in_1>(a, b, shape_c, a_offset, b_offset);
+ reference::arithmetic_operation<T_out>(reference::ArithmeticOperation::ADD, dst, dst_gemm, dst, ConvertPolicy::SATURATE);
+}
+
// used to validate assembly kernels which don't know anything about offsets
template <typename T1, typename T2, typename T3>
SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c)
@@ -336,6 +344,8 @@ template SimpleTensor<int8_t> gemmlowp_quantize_down_scale(const SimpleTensor<in
std::vector<int32_t> result_shift, int32_t min, int32_t max);
template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
+template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst);
+template void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<int32_t> &dst);
template SimpleTensor<int32_t> gemmlowp<int32_t, int8_t, int8_t>(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, uint8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, TensorShape shape_c);
template SimpleTensor<int32_t> gemmlowp<int32_t, uint8_t, int8_t>(const SimpleTensor<uint8_t> &a, const SimpleTensor<int8_t> &b, TensorShape shape_c);
diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h
index 99015d71fb..6e471fdad1 100644
--- a/tests/validation/reference/GEMMLowp.h
+++ b/tests/validation/reference/GEMMLowp.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_GEMMLOWP_H
-#define ARM_COMPUTE_TEST_GEMMLOWP_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H
+#define ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -38,6 +38,9 @@ namespace reference
template <typename T1, typename T2, typename T3>
SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset);
+template <typename T1, typename T2, typename T3>
+void gemmlowp_matrix_multiply_core_accumulate(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c, int32_t a_offset, int32_t b_offset, SimpleTensor<T1> &dst_);
+
template <typename T1, typename T2, typename T3 = T2>
SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T3> &b, TensorShape shape_c);
@@ -71,4 +74,4 @@ SimpleTensor<TOut> gemmlowp_quantize_down_scale_by_float(const SimpleTensor<TIn>
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_GEMMLOWP_H
diff --git a/tests/validation/reference/Gather.cpp b/tests/validation/reference/Gather.cpp
index 93ac09cf95..c90c04f8cc 100644
--- a/tests/validation/reference/Gather.cpp
+++ b/tests/validation/reference/Gather.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,27 +39,56 @@ namespace reference
template <typename T>
SimpleTensor<T> gather(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &indices, uint32_t actual_axis)
{
- const auto *indices_ptr = static_cast<const uint32_t *>(indices.data());
const TensorShape dst_shape = arm_compute::misc::shape_calculator::compute_gather_shape(src.shape(), indices.shape(), actual_axis);
SimpleTensor<T> dst(dst_shape, src.data_type());
+ const auto src_ptr = static_cast<const T *>(src.data());
+ const auto indices_ptr = static_cast<const uint32_t *>(indices.data());
+ const auto dst_ptr = static_cast<T *>(dst.data());
+
+ const uint32_t index_limit = src.shape()[actual_axis];
+
Window win;
win.use_tensor_dimensions(dst_shape);
- execute_window_loop(win, [&](const Coordinates & id)
- {
- Coordinates offset;
- for(unsigned int dim = 0; dim < id.num_dimensions(); ++dim)
+
+ execute_window_loop(win, [&](const Coordinates &dst_coords) {
+ const auto dst_addr = coords2index(dst.shape(), dst_coords);
+
+ // Calculate the coordinates of the index value.
+ Coordinates idx_coords;
+
+ for(size_t i = 0; i < indices.shape().num_dimensions(); ++i)
{
- if(dim == actual_axis)
+ idx_coords.set(i, dst_coords[i + actual_axis]);
+ }
+
+ const auto index = indices_ptr[coords2index(indices.shape(), idx_coords)];
+
+ if(index < index_limit)
+ {
+ // Calculate the coordinates of the source data.
+ Coordinates src_coords;
+
+ for(size_t i = 0; i < actual_axis; ++i)
{
- offset.set(dim, indices_ptr[id[dim]]);
+ src_coords.set(i, dst_coords[i]);
}
- else
+
+ src_coords.set(actual_axis, index);
+
+ for(size_t i = actual_axis + 1; i < src.shape().num_dimensions(); ++i)
{
- offset.set(dim, id[dim]);
+ src_coords.set(i, dst_coords[i + indices.shape().num_dimensions() - 1]);
}
+
+ // Copy the data.
+ const auto src_addr = coords2index(src.shape(), src_coords);
+ dst_ptr[dst_addr] = src_ptr[src_addr];
+ }
+ else
+ {
+ dst_ptr[dst_addr] = 0;
}
- *reinterpret_cast<T *>(dst(id)) = *reinterpret_cast<const T *>(src(offset));
});
return dst;
@@ -72,4 +101,4 @@ template SimpleTensor<uint8_t> gather(const SimpleTensor<uint8_t> &src, const Si
} // namespace reference
} // namespace validation
} // namespace test
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp
new file mode 100644
index 0000000000..7500560c91
--- /dev/null
+++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "IndirectConv2dAddressPrecalculation.h"
+
+#include "arm_compute/core/Types.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_dst, const TensorShape &shape_dst,
+ const PadStrideInfo &conv_info)
+{
+ SimpleTensor<int32_t> out{ shape_dst, DataType::S32 };
+
+ constexpr unsigned int width_idx = 1;
+ constexpr unsigned int heigh_idx = 2;
+
+ const int src_conv_width = static_cast<int32_t>(shape_conv_src[width_idx]); // NHWC
+ const int src_conv_height = static_cast<int32_t>(shape_conv_src[heigh_idx]); // NHWC
+ const int dst_conv_width = static_cast<int32_t>(shape_conv_dst[width_idx]); // NHWC
+ const int wei_conv_width = static_cast<int32_t>(shape_conv_wei[width_idx]); // NHWC
+ const int wei_conv_height = static_cast<int32_t>(shape_conv_wei[heigh_idx]); // NHWC
+ const int dst_width = static_cast<int32_t>(shape_dst[0]);
+ const int dst_height = static_cast<int32_t>(shape_dst[1]);
+ const int dst_batch = static_cast<int32_t>(shape_dst[2]);
+ const int ks = wei_conv_width * wei_conv_height;
+ const int stride_x = static_cast<int32_t>(conv_info.stride().first);
+ const int stride_y = static_cast<int32_t>(conv_info.stride().second);
+ const int pad_left = static_cast<int32_t>(conv_info.pad_left());
+ const int pad_top = static_cast<int32_t>(conv_info.pad_top());
+
+ const int m0 = dst_width / ks;
+
+ for(int z = 0; z < dst_batch; ++z)
+ {
+ for(int y = 0; y < dst_height; ++y)
+ {
+ const int mout = y * m0;
+ for(int ki = 0; ki < ks; ++ki)
+ {
+ const int xk = ki % wei_conv_width;
+ const int yk = ki / wei_conv_width;
+ for(int mi = 0; mi < m0; ++mi)
+ {
+ int xi = ((mout + mi) % dst_conv_width) * stride_x;
+ int yi = ((mout + mi) / dst_conv_width) * stride_y;
+ xi -= pad_left;
+ yi -= pad_top;
+ const int x_s = xi + xk;
+ const int y_s = yi + yk;
+ int my = x_s + y_s * src_conv_width;
+ my = my + z * src_conv_width * src_conv_height;
+ my = x_s >= 0 ? my : -1;
+ my = x_s < src_conv_width ? my : -1;
+ my = y_s >= 0 ? my : -1;
+ my = y_s < src_conv_height ? my : -1;
+
+ const unsigned int addr_out = mi + ki * m0 + y * (dst_width) + z * (dst_width * dst_height);
+ out[addr_out] = my;
+ }
+ }
+ }
+ }
+
+ return out;
+}
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/reference/IndirectConv2dAddressPrecalculation.h b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h
new file mode 100644
index 0000000000..f4a90dfd9f
--- /dev/null
+++ b/tests/validation/reference/IndirectConv2dAddressPrecalculation.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H
+#define ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+SimpleTensor<int32_t> indirect_conv2d_addr_precalculation(const TensorShape &shape_conv_src, const TensorShape &shape_conv_wei, const TensorShape &shape_conv_out, const TensorShape &shape_out,
+ const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_INDIRECT_CONV2D_ADDRESS_PRECALCULATION_H */ \ No newline at end of file
diff --git a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
index 0a23fa19bb..a7c8a784d9 100644
--- a/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/reference/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -63,6 +63,15 @@ SimpleTensor<T> mean_std_normalization_layer(const SimpleTensor<T> &src, float e
return dst;
}
+template <>
+SimpleTensor<uint8_t> mean_std_normalization_layer(const SimpleTensor<uint8_t> &src, float epsilon)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = mean_std_normalization_layer<float>(src_tmp, epsilon);
+ SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info());
+ return dst;
+}
+
template SimpleTensor<float> mean_std_normalization_layer(const SimpleTensor<float> &src, float epsilon);
template SimpleTensor<half> mean_std_normalization_layer(const SimpleTensor<half> &src, float epsilon);
} // namespace reference
diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp
index 6f122b1bf5..7aa3011d8f 100644
--- a/tests/validation/reference/Permute.cpp
+++ b/tests/validation/reference/Permute.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#include "Permute.h"
#include "arm_compute/core/Types.h"
+
#include "tests/validation/Helpers.h"
namespace arm_compute
@@ -42,11 +43,11 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm)
permute(dst_shape, perm);
// Create reference
- SimpleTensor<T> dst{ dst_shape, src.data_type(), src.num_channels(), src.quantization_info() };
+ SimpleTensor<T> dst{dst_shape, src.data_type(), src.num_channels(), src.quantization_info()};
// Compute reference
const uint32_t num_elements = src.num_elements();
- for(uint32_t i = 0; i < num_elements; ++i)
+ for (uint32_t i = 0; i < num_elements; ++i)
{
const Coordinates src_coords = index2coord(src.shape(), i);
Coordinates dst_coords = src_coords;
@@ -58,13 +59,14 @@ SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm)
return dst;
}
-template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm);
-template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
-template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm);
+template SimpleTensor<int8_t> permute(const SimpleTensor<int8_t> &src, PermutationVector perm);
+template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
+template SimpleTensor<int16_t> permute(const SimpleTensor<int16_t> &src, PermutationVector perm);
template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm);
template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm);
-template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm);
-template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm);
+template SimpleTensor<float> permute(const SimpleTensor<float> &src, PermutationVector perm);
+template SimpleTensor<half> permute(const SimpleTensor<half> &src, PermutationVector perm);
+template SimpleTensor<bfloat16> permute(const SimpleTensor<bfloat16> &src, PermutationVector perm);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Pooling3dLayer.cpp b/tests/validation/reference/Pooling3dLayer.cpp
new file mode 100644
index 0000000000..2e8f3a0b92
--- /dev/null
+++ b/tests/validation/reference/Pooling3dLayer.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Pooling3dLayer.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+using namespace arm_compute::misc::shape_calculator;
+
+template <typename T>
+SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices)
+{
+ TensorShape pooled_shape = compute_pool3d_shape(src.shape(), pool3d_info);
+ SimpleTensor<T> dst{ pooled_shape, src.data_type(), 1 };
+
+ if(indices != nullptr)
+ {
+ *indices = SimpleTensor<uint32_t> { pooled_shape, DataType::U32, 1 };
+ }
+
+ const int idx_channel = 0;
+ const int idx_width = 1;
+ const int idx_height = 2;
+ const int idx_depth = 3;
+ const int idx_batch = 4;
+
+ const int pool_size_width = pool3d_info.is_global_pooling ? src.shape()[idx_width] : pool3d_info.pool_size.width;
+ const int pool_size_height = pool3d_info.is_global_pooling ? src.shape()[idx_height] : pool3d_info.pool_size.height;
+ const int pool_size_depth = pool3d_info.is_global_pooling ? src.shape()[idx_depth] : pool3d_info.pool_size.depth;
+
+ const int pool_stride_width = static_cast<int>(pool3d_info.stride.width);
+ const int pool_stride_height = static_cast<int>(pool3d_info.stride.height);
+ const int pool_stride_depth = static_cast<int>(pool3d_info.stride.depth);
+
+ const int pad_left = static_cast<int>(pool3d_info.padding.left);
+ const int pad_top = static_cast<int>(pool3d_info.padding.top);
+ const int pad_front = static_cast<int>(pool3d_info.padding.front);
+
+ const int pad_right = static_cast<int>(pool3d_info.padding.right);
+ const int pad_bottom = static_cast<int>(pool3d_info.padding.bottom);
+ const int pad_back = static_cast<int>(pool3d_info.padding.back);
+
+ const int num_channels = static_cast<int>(src.shape()[idx_channel]);
+ const int num_batches = static_cast<int>(src.shape()[idx_batch]);
+
+ ARM_COMPUTE_ERROR_ON(num_channels != static_cast<int>(dst.shape()[idx_channel]));
+ ARM_COMPUTE_ERROR_ON(num_batches != static_cast<int>(dst.shape()[idx_batch]));
+
+ const int w_src = static_cast<int>(src.shape()[idx_width]);
+ const int h_src = static_cast<int>(src.shape()[idx_height]);
+ const int d_src = static_cast<int>(src.shape()[idx_depth]);
+ const int w_dst = static_cast<int>(dst.shape()[idx_width]);
+ const int h_dst = static_cast<int>(dst.shape()[idx_height]);
+ const int d_dst = static_cast<int>(dst.shape()[idx_depth]);
+
+ const bool exclude_padding = pool3d_info.exclude_padding;
+
+ const int height_stride_src = num_channels * w_src;
+ const int depth_stride_src = height_stride_src * h_src;
+ const int batch_stride_src = depth_stride_src * d_src;
+ const int height_stride_dst = num_channels * w_dst;
+ const int depth_stride_dst = height_stride_dst * h_dst;
+ const int batch_stride_dst = depth_stride_dst * d_dst;
+
+ for(int b = 0; b < num_batches; ++b)
+ {
+ const int batch_offset_dst = b * batch_stride_dst;
+ const int batch_offset_src = b * batch_stride_src;
+ for(int c = 0; c < num_channels; ++c)
+ {
+ for(int d = 0; d < d_dst; ++d)
+ {
+ const int depth_offset_dst = d * depth_stride_dst;
+ for(int h = 0; h < h_dst; ++h)
+ {
+ const int height_offset_dst = h * height_stride_dst;
+ for(int w = 0; w < w_dst; ++w)
+ {
+ int wstart = w * pool_stride_width - pad_left;
+ int hstart = h * pool_stride_height - pad_top;
+ int dstart = d * pool_stride_depth - pad_front;
+ int wend = std::min(wstart + pool_size_width, w_src + pad_right);
+ int hend = std::min(hstart + pool_size_height, h_src + pad_bottom);
+ int dend = std::min(dstart + pool_size_depth, d_src + pad_back);
+
+ // this may not be equal to pool_w * pool_h * pool_d because of
+ // DimensionRoundingType choice (CEIL)
+ int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
+
+ // limit [start, end) to [0, w_src)
+ wstart = std::max(wstart, 0);
+ hstart = std::max(hstart, 0);
+ dstart = std::max(dstart, 0);
+ wend = std::min(wend, w_src);
+ hend = std::min(hend, h_src);
+ dend = std::min(dend, d_src);
+
+ auto max_val = -std::numeric_limits<T>::infinity();
+ int max_index{ 0 };
+ T avg_val = static_cast<T>(0.f);
+ T l2_val = static_cast<T>(0.f);
+
+ if(exclude_padding)
+ {
+ pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart);
+ }
+
+ for(int z = dstart; z < dend; ++z)
+ {
+ const int depth_offset_src = z * depth_stride_src;
+ for(int y = hstart; y < hend; ++y)
+ {
+ const int height_offset_src = y * height_stride_src;
+ for(int x = wstart; x < wend; ++x)
+ {
+ const auto val = static_cast<T>(
+ src[batch_offset_src + depth_offset_src + height_offset_src + x * num_channels + c]);
+ if(val > max_val)
+ {
+ max_val = val;
+ max_index = coord2index(src.shape(), Coordinates(c, x, y, z, 0));
+ }
+
+ avg_val += val;
+ l2_val += val * val;
+ }
+ }
+ }
+
+ avg_val /= pool_size;
+ l2_val = static_cast<T>(std::sqrt(l2_val / pool_size));
+
+ int dst_index = batch_offset_dst + depth_offset_dst + height_offset_dst + w * num_channels + c;
+ switch(pool3d_info.pool_type)
+ {
+ case PoolingType::MAX:
+ dst[dst_index] = static_cast<T>(max_val);
+ break;
+ case PoolingType::AVG:
+ dst[dst_index] = static_cast<T>(avg_val);
+ break;
+ case PoolingType::L2:
+ dst[dst_index] = static_cast<T>(l2_val);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Pooling Type should be either MAX, AVG or L2");
+ }
+
+ if(indices != nullptr)
+ {
+ (*indices)[dst_index] = max_index;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return dst;
+}
+
+template SimpleTensor<float> pooling_3d_layer(const SimpleTensor<float> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices);
+template SimpleTensor<half> pooling_3d_layer(const SimpleTensor<half> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices);
+
+template <typename T>
+SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
+{
+ ARM_COMPUTE_UNUSED(output_qinfo);
+ return pooling_3d_layer_internal<T>(src, pool3d_info, indices);
+}
+
+template <>
+SimpleTensor<int8_t> pooling_3d_layer<int8_t>(const SimpleTensor<int8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices);
+ return convert_to_asymmetric<int8_t>(dst_tmp, output_qinfo);
+}
+
+template <>
+SimpleTensor<uint8_t> pooling_3d_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
+{
+ SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
+ SimpleTensor<float> dst_tmp = pooling_3d_layer_internal<float>(src_tmp, pool3d_info, indices);
+ return convert_to_asymmetric<uint8_t>(dst_tmp, output_qinfo);
+}
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Pooling3dLayer.h b/tests/validation/reference/Pooling3dLayer.h
new file mode 100644
index 0000000000..481a0d3024
--- /dev/null
+++ b/tests/validation/reference/Pooling3dLayer.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_POOL3D_LAYER_H
+#define ARM_COMPUTE_TEST_POOL3D_LAYER_H
+
+#include "Utils.h"
+#include "arm_compute/core/Types.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> pooling_3d_layer_internal(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, SimpleTensor<uint32_t> *indices = nullptr);
+
+template <typename T>
+SimpleTensor<T> pooling_3d_layer(const SimpleTensor<T> &src, const Pooling3dLayerInfo &pool3d_info, const QuantizationInfo &output_qinfo = QuantizationInfo(),
+ SimpleTensor<uint32_t> *indices = nullptr);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_POOL3D_LAYER_H */
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index 5f4edfe49c..bf7bd0c1df 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,6 @@ using namespace arm_compute::misc::shape_calculator;
template <typename T, typename ACC_T, typename std::enable_if<is_floating_point<T>::value, int>::type>
SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices, DataLayout data_layout)
{
- ARM_COMPUTE_ERROR_ON(info.is_global_pooling && (src.shape().x() != src.shape().y()));
// Create reference
SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 };
auto pooled_shape = compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info);
@@ -84,20 +83,28 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
{
int wstart = w * pool_stride_x - pad_left;
int hstart = h * pool_stride_y - pad_top;
+
+ // Used to calculate kernel indices
+ int kh_start = std::max(0, -hstart);
+ int kw_start = std::max(0, -wstart);
+ int max_ker_index{ 0 };
+
int wend = std::min(wstart + pool_size_x, w_src);
int hend = std::min(hstart + pool_size_y, h_src);
wstart = std::max(wstart, 0);
hstart = std::max(hstart, 0);
- auto max_val = std::numeric_limits<ACC_T>::lowest();
+ auto max_val = info.use_inf_as_limit ? -std::numeric_limits<ACC_T>::infinity() : std::numeric_limits<ACC_T>::lowest();
int max_index{ 0 };
- for(int y = hstart; y < hend; ++y)
+
+ for(int y = hstart, kh = kh_start; y < hend; ++y, ++kh)
{
- for(int x = wstart; x < wend; ++x)
+ for(int x = wstart, kw = kw_start; x < wend; ++x, ++kw)
{
const auto val = static_cast<ACC_T>(src[b * z_src * h_src * w_src + r * h_src * w_src + y * w_src + x]);
if(val > max_val)
{
- max_val = val;
+ max_val = val;
+ max_ker_index = pool_size_x * (kh) + (kw);
if(data_layout == DataLayout::NCHW)
{
max_index = coord2index(src.shape(), Coordinates(x, y, r, 0));
@@ -113,7 +120,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
dst[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = static_cast<T>(max_val);
if(indices)
{
- (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = max_index;
+ (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = (info.use_kernel_indices) ? max_ker_index : max_index;
}
}
}
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
index 27665375c3..ad7ba7ac43 100644
--- a/tests/validation/reference/QuantizationLayer.cpp
+++ b/tests/validation/reference/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/tests/validation/reference/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
index ffb79f86c5..c189bc2d47 100644
--- a/tests/validation/reference/ReductionOperation.cpp
+++ b/tests/validation/reference/ReductionOperation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,6 @@
* SOFTWARE.
*/
#include "ReductionOperation.h"
-
#include "tests/validation/Helpers.h"
#include <algorithm>
@@ -39,7 +38,7 @@ namespace reference
namespace
{
template <typename T, typename OT>
-OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
+OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy)
{
using type = typename std::remove_cv<OT>::type;
T res;
@@ -99,7 +98,14 @@ OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, in
}
if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
{
- int_res /= reduce_elements;
+ // Only use rounding in aarch64 to be consistent with kernel
+#ifdef __aarch64__
+ // Divide in float format, then rounded to nearest and implicitly cast back to int
+ int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy);
+#else // defined(__aarch64__)
+ ARM_COMPUTE_UNUSED(policy);
+ int_res /= reduce_elements; // Legacy compatibility
+#endif // __aarch64
}
res = static_cast<type>(int_res);
}
@@ -175,12 +181,12 @@ OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOper
} // namespace
template <typename T, typename OT>
-SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op)
+SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type, RoundingPolicy policy)
{
// Create reference
- const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
- DataType output_data_type = is_arg_min_max ? DataType::S32 : src.data_type();
- SimpleTensor<OT> dst{ dst_shape, output_data_type, 1, src.quantization_info() };
+ const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
+ SimpleTensor<OT> dst{ dst_shape, output_type, 1, src.quantization_info() };
const unsigned int src_width = src.shape().x();
const unsigned int src_height = src.shape().y();
const unsigned int src_depth = src.shape().z();
@@ -197,7 +203,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + du * reduce_elems;
dst[du] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy);
}
}
break;
@@ -213,7 +219,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy);
}
}
}
@@ -232,7 +238,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy);
}
}
}
@@ -254,7 +260,7 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
const T *src_row_ptr = src.data() + in_offset;
dst[out_offset] = is_arg_min_max ?
reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
- reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth);
+ reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy);
}
}
}
@@ -269,74 +275,89 @@ SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const T
}
template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
ARM_COMPUTE_UNUSED(quantization_info_output);
- return compute_reduction_operation<T, OT>(src, dst_shape, axis, op);
+ return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, output_type, policy);
}
template <>
-SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
if(src.data_type() == DataType::QASYMM8)
{
// If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset
if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
{
- return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy);
}
else
{
SimpleTensor<float> src_f = convert_from_asymmetric(src);
- SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
+ SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type);
return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output);
}
}
else
{
- return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, output_type, policy);
}
}
template <>
-SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output)
+SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis,
+ ReductionOperation op, DataType output_type, QuantizationInfo quantization_info_output, RoundingPolicy policy)
{
if(src.data_type() == DataType::QASYMM8_SIGNED)
{
// If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset
if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
{
- return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy);
}
else
{
SimpleTensor<float> src_f = convert_from_asymmetric(src);
- SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
+ SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op, output_type);
return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output);
}
}
else
{
- return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op);
+ return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, output_type, policy);
}
}
template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(),
+ RoundingPolicy policy = RoundingPolicy::TO_ZERO);
+
template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
+
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+ DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
+template SimpleTensor<int64_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
+ DataType output_type = DataType::S32, QuantizationInfo quantization_info_output = QuantizationInfo(),
+ RoundingPolicy policy = RoundingPolicy::TO_ZERO);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h
index 9c9e721b29..fb2e7a7093 100644
--- a/tests/validation/reference/ReductionOperation.h
+++ b/tests/validation/reference/ReductionOperation.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_TEST_REDUCTION_OPERATION_H
#define ARM_COMPUTE_TEST_REDUCTION_OPERATION_H
+#include "arm_compute/core/Rounding.h"
#include "tests/SimpleTensor.h"
#include "tests/validation/Helpers.h"
@@ -36,8 +37,8 @@ namespace validation
namespace reference
{
template <typename T, typename OT>
-SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
- QuantizationInfo quantization_info_output = QuantizationInfo());
+SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, DataType output_type = DataType::S32,
+ QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Remap.cpp b/tests/validation/reference/Remap.cpp
deleted file mode 100644
index 33c5a7de68..0000000000
--- a/tests/validation/reference/Remap.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "Remap.h"
-
-#include "Utils.h"
-#include "tests/validation/Helpers.h"
-
-#include <algorithm>
-#include <array>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode,
- T constant_border_value)
-{
- ARM_COMPUTE_ERROR_ON_MSG(border_mode == BorderMode::REPLICATE, "BorderMode not supported");
- SimpleTensor<T> out(in.shape(), in.data_type());
- ARM_COMPUTE_ERROR_ON(out.num_elements() != map_x.num_elements());
- const int width = in.shape().x();
- const int height = in.shape().y();
- const uint32_t num_elements = out.num_elements();
- for(uint32_t idx = 0; idx < num_elements; idx++)
- {
- const Coordinates id_out = index2coord(out.shape(), idx);
- valid_mask[idx] = 1;
- Coordinates src_idx = id_out; // need to setup all coordinates and not just xy
- if((0 <= map_y[idx]) && (map_y[idx] < height) && (0 <= map_x[idx]) && (map_x[idx] < width))
- {
- switch(policy)
- {
- case InterpolationPolicy::NEAREST_NEIGHBOR:
- {
- src_idx.set(0, static_cast<int>(std::floor(map_x[idx])));
- src_idx.set(1, static_cast<int>(std::floor(map_y[idx])));
- out[idx] = in[coord2index(in.shape(), src_idx)];
- break;
- }
- case InterpolationPolicy::BILINEAR:
- {
- (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ?
- out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) :
- valid_mask[idx] = 0;
- break;
- }
- case InterpolationPolicy::AREA:
- default:
- ARM_COMPUTE_ERROR("Interpolation not supported");
- break;
- }
- }
- else
- {
- if(border_mode == BorderMode::UNDEFINED)
- {
- valid_mask[idx] = 0;
- }
- else
- {
- switch(policy)
- {
- case InterpolationPolicy::NEAREST_NEIGHBOR:
- out[idx] = constant_border_value;
- break;
- case InterpolationPolicy::BILINEAR:
- out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value);
- break;
- case InterpolationPolicy::AREA:
- default:
- break;
- }
- }
- }
- }
-
- return out;
-}
-
-template SimpleTensor<uint8_t> remap(const SimpleTensor<uint8_t> &src, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<uint8_t> &valid_mask, InterpolationPolicy policy,
- BorderMode border_mode,
- uint8_t constant_border_value);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/reference/Reorder.cpp b/tests/validation/reference/Reorder.cpp
new file mode 100644
index 0000000000..8abb372596
--- /dev/null
+++ b/tests/validation/reference/Reorder.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Reorder.h"
+#include "src/core/NEON/kernels/arm_gemm/utils.hpp"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+
+/*
+ * Generic transform.
+ *
+ * Assuming the untransposed case, this works by first reading <BlockBy>
+ * consecutive values from the first input row. This same number of values
+ * are then read from the next <IntBy-1> rows. Now return to the first
+ * input row and repeat.
+ *
+ * Need to cope with the work requested in either dimension not actually
+ * being a multiple of the block sizes.
+ */
+template <unsigned int tIntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize, typename d_type, arm_gemm::VLType vlt>
+struct Transform_ref
+{
+ template <typename TOut, typename TIn>
+ static void Transform(TOut &out, const TIn in, const int stride,
+ const int y0, const int ymax, const int x0, const int xmax)
+ {
+ // NOTE: This code is disabled to avoid the call to get_vector_length(), so templated transforms will not be
+ // correct for SVE. This is not an issue as we have specializations for all SVE cases.
+ // For SVE cases we multiply the interleave factor by the vector length.
+ // const unsigned int IntBy = tIntBy * (vlt == VLType::SVE ? get_vector_length<TOut>() / BlockBy : 1);
+ const unsigned int IntBy = tIntBy;
+ int out_index = 0;
+
+ const int n_whole_y_blocks = (ymax - y0) / IntBy;
+ const int y_remainders = (ymax - y0) % IntBy;
+ const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0);
+
+ const int n_whole_x_blocks = (xmax - x0) / BlockBy;
+ const int x_remainders = (xmax - x0) % BlockBy;
+ const int n_x_blocks = n_whole_x_blocks + (x_remainders ? 1 : 0);
+
+ // "Y" loop: advance down the rows of the source IntBy rows at a time.
+ // Set up fill_rows to show the number rows to copy from, and blank_rows
+ // for the number of blank rows to add.
+ for(int y_block = 0; y_block < n_y_blocks; y_block++)
+ {
+ const int fill_rows = (y_block < n_whole_y_blocks) ? IntBy : y_remainders;
+ const int blank_rows = IntBy - fill_rows;
+
+ const int y_base = y0 + (y_block * IntBy);
+
+ // So now advance along this block of rows, BlockBy columns at a time.
+ for(int x_block = 0; x_block < n_x_blocks; x_block++)
+ {
+ const int fill_cols = (x_block < n_whole_x_blocks) ? BlockBy : x_remainders;
+ const int blank_cols = BlockBy - fill_cols;
+
+ const int x_base = x0 + (x_block * BlockBy);
+
+ for(int row = 0; row < fill_rows; row++)
+ {
+ for(int col = 0; col < fill_cols; col++)
+ {
+ // In-range copy. If it's transposed, we reverse the sense of rows and columns here.
+ if(Transposed)
+ {
+ out[out_index] = in[(x_base + col) * stride + y_base + row];
+ out_index++;
+ }
+ else
+ {
+ out[out_index] = in[(y_base + row) * stride + x_base + col];
+ out_index++;
+ }
+ }
+ // "col" tail - row is in range but column is out of range.
+ for(int col = 0; col < blank_cols; col++)
+ {
+ out[out_index] = 0;
+ out_index++;
+ }
+ }
+ // "row" tail - row is out of range so fill with zeros always.
+ const d_type zeroval = 0;
+ const int pads = blank_rows * (fill_cols + blank_cols);
+
+ for(int i = 0; i < pads; i++)
+ {
+ out[out_index] = zeroval;
+ }
+
+ out_index += pads;
+ }
+ }
+ }
+};
+
+template <typename T>
+SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf)
+{
+ SimpleTensor<T> dst{ output_shape, src.data_type() };
+ const int cols = src.shape()[0];
+ const int rows = src.shape()[1];
+
+ switch(output_wf)
+ {
+ case WeightFormat::OHWIo4:
+ {
+ Transform_ref<4, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols);
+ break;
+ }
+ case WeightFormat::OHWIo8:
+ {
+ Transform_ref<8, 1, true, sizeof(float), sizeof(float), float, arm_gemm::VLType::None>::Transform<SimpleTensor<T> &, SimpleTensor<T>>(dst, src, rows, 0, rows, 0, cols);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return dst;
+}
+
+template SimpleTensor<float> reorder_layer(const SimpleTensor<float> &src, const TensorShape &output_shape, WeightFormat output_wf);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Remap.h b/tests/validation/reference/Reorder.h
index 0726f75965..94ee5078f8 100644
--- a/tests/validation/reference/Remap.h
+++ b/tests/validation/reference/Reorder.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,10 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_REMAP_H
-#define ARM_COMPUTE_TEST_REMAP_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_REORDER
+#define ACL_TESTS_VALIDATION_REFERENCE_REORDER
#include "tests/SimpleTensor.h"
+#include "tests/Types.h"
namespace arm_compute
{
@@ -35,10 +36,9 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode,
- T constant_border_value = 0);
+SimpleTensor<T> reorder_layer(const SimpleTensor<T> &src, const TensorShape &output_shape, WeightFormat output_wf);
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REMAP_H */
+#endif /* ACL_TESTS_VALIDATION_REFERENCE_REORDER */
diff --git a/tests/validation/reference/ReshapeLayer.cpp b/tests/validation/reference/ReshapeLayer.cpp
index daea001be6..30a58dd65b 100644
--- a/tests/validation/reference/ReshapeLayer.cpp
+++ b/tests/validation/reference/ReshapeLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017,2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,14 +44,15 @@ SimpleTensor<T> reshape_layer(const SimpleTensor<T> &src, const TensorShape &out
return dst;
}
-template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape);
-template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape);
+template SimpleTensor<uint8_t> reshape_layer(const SimpleTensor<uint8_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int8_t> reshape_layer(const SimpleTensor<int8_t> &src, const TensorShape &output_shape);
template SimpleTensor<uint16_t> reshape_layer(const SimpleTensor<uint16_t> &src, const TensorShape &output_shape);
-template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape);
+template SimpleTensor<int16_t> reshape_layer(const SimpleTensor<int16_t> &src, const TensorShape &output_shape);
template SimpleTensor<uint32_t> reshape_layer(const SimpleTensor<uint32_t> &src, const TensorShape &output_shape);
-template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape);
-template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape);
-template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape);
+template SimpleTensor<int32_t> reshape_layer(const SimpleTensor<int32_t> &src, const TensorShape &output_shape);
+template SimpleTensor<half> reshape_layer(const SimpleTensor<half> &src, const TensorShape &output_shape);
+template SimpleTensor<float> reshape_layer(const SimpleTensor<float> &src, const TensorShape &output_shape);
+template SimpleTensor<bfloat16> reshape_layer(const SimpleTensor<bfloat16> &src, const TensorShape &output_shape);
/** [ReshapeLayer] **/
} // namespace reference
} // namespace validation
diff --git a/tests/validation/reference/Reverse.cpp b/tests/validation/reference/Reverse.cpp
index c6c4614278..7924f900d1 100644
--- a/tests/validation/reference/Reverse.cpp
+++ b/tests/validation/reference/Reverse.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -35,8 +35,9 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis)
+SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis)
{
+ ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4);
ARM_COMPUTE_ERROR_ON(axis.shape().num_dimensions() > 1);
ARM_COMPUTE_ERROR_ON(axis.shape().x() > 4);
@@ -48,10 +49,32 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t>
const unsigned int depth = src.shape()[2];
const unsigned int batches = src.shape()[3];
+ const int rank = src.shape().num_dimensions();
+
std::array<bool, 4> to_reverse = { { false, false, false, false } };
for(int i = 0; i < axis.num_elements(); ++i)
{
- to_reverse[axis[i]] = true;
+ int axis_i = axis[i];
+
+ // The values of axis tensor must be between [-rank, rank-1].
+ if((axis_i < -rank) || (axis_i >= rank))
+ {
+ ARM_COMPUTE_ERROR("the values of the axis tensor must be within [-rank, rank-1].");
+ }
+
+ // In case of negative axis value i.e targeted axis(i) = rank + axis(i)
+ if(axis_i < 0)
+ {
+ axis_i = rank + axis_i;
+ }
+
+ // Reverse ACL axis indices convention i.e. (inverted)axis = (tensor_rank - 1) - axis
+ if(use_inverted_axis)
+ {
+ axis_i = (rank - 1) - axis_i;
+ }
+
+ to_reverse[axis_i] = true;
}
const uint32_t num_elements = src.num_elements();
@@ -73,9 +96,9 @@ SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t>
return dst;
}
-template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint32_t> &axis);
-template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<uint32_t> &axis);
-template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<uint32_t> &axis);
+template SimpleTensor<uint8_t> reverse(const SimpleTensor<uint8_t> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis);
+template SimpleTensor<half> reverse(const SimpleTensor<half> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis);
+template SimpleTensor<float> reverse(const SimpleTensor<float> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Reverse.h b/tests/validation/reference/Reverse.h
index 4a28da7270..30926b05a5 100644
--- a/tests/validation/reference/Reverse.h
+++ b/tests/validation/reference/Reverse.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_TEST_REVERSE_H
-#define ARM_COMPUTE_TEST_REVERSE_H
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H
+#define ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H
#include "tests/SimpleTensor.h"
@@ -35,9 +35,9 @@ namespace validation
namespace reference
{
template <typename T>
-SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<uint32_t> &axis);
+SimpleTensor<T> reverse(const SimpleTensor<T> &src, const SimpleTensor<int32_t> &axis, bool use_inverted_axis = false);
} // namespace reference
} // namespace validation
} // namespace test
} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_REVERSE_H */
+#endif // ACL_TESTS_VALIDATION_REFERENCE_REVERSE_H
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 71e98fd776..2f429cb29b 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,7 +25,6 @@
#include "Scale.h"
#include "Utils.h"
-#include "arm_compute/core/utils/misc/Utility.h"
#include "src/core/utils/ScaleUtils.h"
#include "support/Rounding.h"
@@ -183,14 +182,15 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale
template <typename T>
SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners)
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
{
+ ARM_COMPUTE_UNUSED(output_quantization_info);
return scale_core<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale, align_corners);
}
template <>
SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners)
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
{
SimpleTensor<uint8_t> dst;
if(src.quantization_info().uniform().scale != 0.f)
@@ -198,7 +198,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo
SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info());
SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners);
- dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info());
+ dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_quantization_info);
}
else
{
@@ -209,7 +209,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo
template <>
SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int8_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners)
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info)
{
SimpleTensor<int8_t> dst;
if(src.quantization_info().uniform().scale != 0.f)
@@ -217,7 +217,7 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float
SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
float constant_border_value_f = dequantize_qasymm8_signed(constant_border_value, src.quantization_info());
SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners);
- dst = convert_to_asymmetric<int8_t>(dst_tmp, src.quantization_info());
+ dst = convert_to_asymmetric<int8_t>(dst_tmp, output_quantization_info);
}
else
{
@@ -227,11 +227,11 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float
}
template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners);
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info);
template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners);
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info);
template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value,
- SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners);
+ SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/Scale.h b/tests/validation/reference/Scale.h
index c66af8d94e..c32c07d1c0 100644
--- a/tests/validation/reference/Scale.h
+++ b/tests/validation/reference/Scale.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -37,7 +37,7 @@ namespace reference
{
template <typename T>
SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0,
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false);
+ SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false, QuantizationInfo output_quantization_info = QuantizationInfo());
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp
new file mode 100644
index 0000000000..55c48a9002
--- /dev/null
+++ b/tests/validation/reference/ScatterLayer.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ScatterLayer.h"
+#include "tests/validation/Helpers.h"
+#include "arm_compute/core/TensorShape.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+
+template <typename T>
+T reduce_op(const T &current,const T &update,const ScatterFunction func)
+{
+ switch(func)
+ {
+ case ScatterFunction::Update:
+ return update;
+ break;
+ case ScatterFunction::Add:
+ return current + update;
+ break;
+ case ScatterFunction::Sub:
+ return current - update;
+ break;
+ case ScatterFunction::Max:
+ return std::max(current, update);
+ break;
+ case ScatterFunction::Min:
+ return std::min(current, update);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported Scatter function");
+ break;
+ }
+}
+
+template float reduce_op(const float &current,const float &update,const ScatterFunction func);
+template half reduce_op(const half &current,const half &update,const ScatterFunction func);
+}
+
+// NOTE: This function expects collapsed tensors as input.
+// Batch dims for update/indices tensors should be collapsed into a single dim.
+// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function.
+template <typename T>
+SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info)
+{
+ // 1. If zero initialization variable is false, copy src data to dst.
+ SimpleTensor<T> dst{ out_shape, src.data_type(), 1 };
+ if(!info.zero_initialization)
+ {
+ std::copy_n(src.data(), src.num_elements(), dst.data());
+ }
+
+ // Number of elements between each value of the dim being iterated through
+ const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1);
+ const unsigned int no_output_dims = out_shape.num_dimensions();
+
+ // Calculate output stride at given index for all output dims.
+ std::vector<unsigned int> out_stride_at_idx(no_output_dims);
+ for (unsigned int i = 0 ; i < no_output_dims; i++)
+ {
+ out_stride_at_idx[i] = out_shape.total_size_lower(i);
+ }
+
+ const unsigned int indices_x_dim = static_cast<unsigned int>(indices.shape()[0]);
+ const unsigned int indices_y_dim = static_cast<unsigned int>(indices.shape()[1]);
+
+ // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor.
+ for(unsigned int i = 0; i < indices_y_dim; i++)
+ {
+ // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim
+ // Starting index for both the update and indices tensors.
+ const unsigned int update_dim_start = i * data_stride;
+ const unsigned int indices_dim_start = i * indices_x_dim;
+ bool out_of_bounds = false;
+ unsigned int out_offset_acc = 0;
+
+ // Iterate over each indices value for the relevant batch and accumulate the offset.
+ for(unsigned int j = 0; j < indices_x_dim; j++)
+ {
+ // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k
+ const int index_value = indices[indices_dim_start + j];
+ const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value.
+ if(index_value < static_cast<int>(out_shape[out_dim]) && index_value >= 0)
+ {
+ out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation
+ }
+ else
+ {
+ out_of_bounds = true;
+ break;
+ }
+ }
+
+ // If not out of bounds, copy update tensor elements to output
+ if(!out_of_bounds)
+ {
+ for (unsigned int j = 0 ; j < data_stride; j++)
+ {
+ dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func);
+ }
+ }
+ }
+ return dst;
+}
+
+template <typename T>
+SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info)
+{
+ return scatter_layer_internal<T>(src, updates, indices, out_shape, info);
+}
+
+template SimpleTensor<float> scatter_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<half> scatter_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<int32_t> scatter_layer(const SimpleTensor<int32_t> &src, const SimpleTensor<int32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<uint32_t> scatter_layer(const SimpleTensor<uint32_t> &src, const SimpleTensor<uint32_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<int16_t> scatter_layer(const SimpleTensor<int16_t> &src, const SimpleTensor<int16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<uint16_t> scatter_layer(const SimpleTensor<uint16_t> &src, const SimpleTensor<uint16_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<int8_t> scatter_layer(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+template SimpleTensor<uint8_t> scatter_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &updates, const SimpleTensor<int32_t> &indices, const TensorShape &out_shape, const ScatterInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/ScatterLayer.h b/tests/validation/reference/ScatterLayer.h
new file mode 100644
index 0000000000..97d5e70b0d
--- /dev/null
+++ b/tests/validation/reference/ScatterLayer.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H
+#define ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H
+
+#include "Utils.h"
+#include "arm_compute/function_info/ScatterInfo.h"
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> scatter_layer_internal(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info);
+
+template <typename T>
+SimpleTensor<T> scatter_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &update, const SimpleTensor<int32_t> &indices, const TensorShape &shape, const ScatterInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_REFERENCE_SCATTERLAYER_H
diff --git a/tests/validation/reference/UtilsQuantizedAsymm.h b/tests/validation/reference/UtilsQuantizedAsymm.h
index 1f593bb696..e5ecc66545 100644
--- a/tests/validation/reference/UtilsQuantizedAsymm.h
+++ b/tests/validation/reference/UtilsQuantizedAsymm.h
@@ -32,6 +32,22 @@ namespace test
{
namespace validation
{
+namespace
+{
+#if __clang__
+// This has been tested on clang 7.0.2 (__clang_major__ == 7 && __clang_minor__ == 0 && __clang_patchlevel__ == 2)
+inline int64_t to_int64(int32_t val)
+{
+ return static_cast<int64_t>(val) | ((val < 0) ? (((1ll << 32) - 1) << 32) : 0);
+}
+#else // __clang__
+inline int64_t to_int64(int32_t val)
+{
+ return static_cast<int64_t>(val);
+}
+#endif // __clang__
+} // namespace
+
/** Rounded to nearest division by a power-of-two. */
inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent)
{
@@ -43,12 +59,12 @@ inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent)
/** Multiplication of two integers. The same as ARMv7 Arm® Neon™ VQRDMULH instruction. */
inline int32_t asymm_int_mult(int32_t a, int32_t b)
{
- bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
- int64_t a_64(a);
- int64_t b_64(b);
- int64_t ab_64 = a_64 * b_64;
- int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
- int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
+ const bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
+ const int64_t a_64 = to_int64(a);
+ const int64_t b_64 = to_int64(b);
+ const int64_t ab_64 = a_64 * b_64;
+ const int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ const int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
}