From 0e2123695083df5fc1a98af22bbb51808c413350 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Mon, 8 Apr 2024 14:38:31 +0100 Subject: Multi-Dimensional and Batched Scatter Reference and Dataset Implementation. Resolves: [COMPMID-6893, COMPMID-6895, COMPMID-6898] Change-Id: I355f46aeba2213cd8d067cac7643d8d96e713c93 Signed-off-by: Mohammed Suhail Munshi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11430 Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/reference/ScatterLayer.cpp | 68 +++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 18 deletions(-) (limited to 'tests/validation/reference/ScatterLayer.cpp') diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp index 7543b46bb1..283022e8e2 100644 --- a/tests/validation/reference/ScatterLayer.cpp +++ b/tests/validation/reference/ScatterLayer.cpp @@ -23,6 +23,7 @@ */ #include "ScatterLayer.h" #include "tests/validation/Helpers.h" +#include "arm_compute/core/TensorShape.h" namespace arm_compute { @@ -64,36 +65,67 @@ T reduce_op(const T ¤t,const T &update,const ScatterFunction func) template float reduce_op(const float ¤t,const float &update,const ScatterFunction func); } -// Note : This function currently only supports 1D src, 1D updates, 2D indices, 1D output tensors. +// NOTE: This function expects collapsed tensors as input. +// Batch dims for update/indices tensors should be collapsed into a single dim. +// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function. template SimpleTensor scatter_layer_internal(const SimpleTensor &src, const SimpleTensor &updates, const SimpleTensor &indices, const TensorShape &out_shape, const ScatterInfo &info) { + // 1. If zero initialization variable is false, copy src data to dst. SimpleTensor dst{ out_shape, src.data_type(), 1 }; - - // 1. If zero initialization variable is true, fill dst with 0 values. Else copy src data to dst. - if(info.zero_initialization) - { - for (int i = 0; i < src.num_elements(); ++i) - { - dst[i] = static_cast(0); - } - } - else + if(!info.zero_initialization) { std::copy_n(src.data(), src.num_elements(), dst.data()); } - // 2. Get max index of output tensor, then iterate over index tensor. - const int x_bound = static_cast(dst.shape().x()); + // Number of elements between each value of the dim being iterated through + const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1); + const unsigned int no_output_dims = out_shape.num_dimensions(); + + // Calculate output stride at given index for all output dims. + std::vector out_stride_at_idx(no_output_dims); + for (unsigned int i = 0 ; i < no_output_dims; i++) + { + out_stride_at_idx[i] = out_shape.total_size_lower(i); + } + const unsigned int indices_x_dim = static_cast(indices.shape()[0]); + const unsigned int indices_y_dim = static_cast(indices.shape()[1]); - for(int i = 0; i < indices.num_elements(); ++i) + // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor. + for(unsigned int i = 0; i < indices_y_dim; i++) { - // 3. Check whether index is out of bounds for dst, if not then apply reduce op. - const auto index = indices[i]; - if (index < x_bound && index >= 0) // Note : we ignore negative index values. + // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim + // Starting index for both the update and indices tensors. + const unsigned int update_dim_start = i * data_stride; + const unsigned int indices_dim_start = i * indices_x_dim; + bool out_of_bounds = false; + unsigned int out_offset_acc = 0; + + // Iterate over each indices value for the relevant batch and accumulate the offset. + for(unsigned int j = 0; j < indices_x_dim; j++) + { + // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k + const int index_value = indices[indices_dim_start + j]; + const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value. + if(index_value < static_cast(out_shape[out_dim]) && index_value >= 0) + { + out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation + } + else + { + out_of_bounds = true; + break; + } + } + + // If not out of bounds, copy update tensor elements to output + if(!out_of_bounds) { - dst[index] = reduce_op(dst[index], updates[i], info.func); + for (unsigned int j = 0 ; j < data_stride; j++) + { + dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func); + } } } return dst; -- cgit v1.2.1