From 0e2123695083df5fc1a98af22bbb51808c413350 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Mon, 8 Apr 2024 14:38:31 +0100 Subject: Multi-Dimensional and Batched Scatter Reference and Dataset Implementation. Resolves: [COMPMID-6893, COMPMID-6895, COMPMID-6898] Change-Id: I355f46aeba2213cd8d067cac7643d8d96e713c93 Signed-off-by: Mohammed Suhail Munshi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11430 Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- tests/datasets/ScatterDataset.h | 63 +++++++++++++++++++++++ tests/validation/CL/ScatterLayer.cpp | 1 + tests/validation/fixtures/ScatterLayerFixture.h | 18 ++++++- tests/validation/reference/ScatterLayer.cpp | 68 ++++++++++++++++++------- 4 files changed, 131 insertions(+), 19 deletions(-) diff --git a/tests/datasets/ScatterDataset.h b/tests/datasets/ScatterDataset.h index f7547ecc94..8b0972f99a 100644 --- a/tests/datasets/ScatterDataset.h +++ b/tests/datasets/ScatterDataset.h @@ -113,6 +113,8 @@ private: std::vector _dst_shapes{}; }; + +// 1D dataset for simple scatter tests. class Small1DScatterDataset final : public ScatterDataset { public: @@ -122,6 +124,67 @@ public: add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U)); } }; + +// This dataset represents the (m+1)-D updates/dst case. +class SmallScatterMultiDimDataset final : public ScatterDataset +{ +public: + SmallScatterMultiDimDataset() + { + // NOTE: Config is src, updates, indices, output. + // - In this config, the dim replaced is the final number (largest tensor dimension) + // - Largest "updates" dim should match y-dim of indices. + // - src/updates/dst should all have same number of dims. Indices should be 2D. + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U), TensorShape(1U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U)); + add_config(TensorShape(3U, 2U, 4U, 2U), TensorShape(3U, 2U, 4U, 2U), TensorShape(1U, 2U), TensorShape(3U, 2U, 4U, 2U)); + } +}; + +// This dataset represents the (m+1)-D updates tensor, (m+n)-d output tensor cases +class SmallScatterMultiIndicesDataset final : public ScatterDataset +{ +public: + SmallScatterMultiIndicesDataset() + { + // NOTE: Config is src, updates, indices, output. + // NOTE: indices.shape.x = src.num_dimensions - updates.num_dimensions + 1 + + // index length is 2 + add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 4U), TensorShape(2U, 4U), TensorShape(6U, 5U, 2U)); + add_config(TensorShape(17U, 3U, 3U, 2U), TensorShape(17U, 3U, 2U), TensorShape(2U, 2U), TensorShape(17U, 3U, 3U, 2U)); + add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U)); + add_config(TensorShape(5U, 4U, 3U, 3U, 2U, 4U), TensorShape(5U, 4U, 3U, 3U, 5U), TensorShape(2U, 5U), TensorShape(5U, 4U, 3U, 3U, 2U, 4U)); + + // index length is 3 + add_config(TensorShape(4U, 3U, 2U, 2U), TensorShape(4U, 2U), TensorShape(3U, 2U), TensorShape(4U, 3U, 2U, 2U)); + add_config(TensorShape(17U, 4U, 3U, 2U, 2U), TensorShape(17U, 4U, 4U), TensorShape(3U, 4U), TensorShape(17U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 5U, 3U), TensorShape(3U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + + // index length is 4 + add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U)); + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 3U), TensorShape(4U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + + // index length is 5 + add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 3U), TensorShape(5U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U)); + } +}; + +// This dataset represents the (m+k)-D updates tensor, (k+1)-d indices tensor and (m+n)-d output tensor cases +class SmallScatterBatchedDataset final : public ScatterDataset +{ +public: + SmallScatterBatchedDataset() + { + // NOTE: Config is src, updates, indices, output. + // NOTE: Updates/Indices tensors are now batched. + // NOTE: indices.shape.x = (updates_batched) ? (src.num_dimensions - updates.num_dimensions) + 2 : (src.num_dimensions - updates.num_dimensions) + 1 + add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U), TensorShape(1U, 2U, 2U), TensorShape(6U, 5U)); + add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 2U, 2U), TensorShape(2U, 2U, 2U), TensorShape(6U, 5U, 2U)); + add_config(TensorShape(6U, 5U, 2U, 2U), TensorShape(3U, 2U), TensorShape(4U, 3U, 2U), TensorShape(6U, 5U, 2U, 2U)); + add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(6U, 2U), TensorShape(5U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp index 9711671841..5b1d5afe92 100644 --- a/tests/validation/CL/ScatterLayer.cpp +++ b/tests/validation/CL/ScatterLayer.cpp @@ -111,6 +111,7 @@ FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture, framework { validate(CLAccessor(_target), _reference, tolerance_f32); } + TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float TEST_SUITE_END() // Scatter diff --git a/tests/validation/fixtures/ScatterLayerFixture.h b/tests/validation/fixtures/ScatterLayerFixture.h index 451a1e1416..91e28b58f7 100644 --- a/tests/validation/fixtures/ScatterLayerFixture.h +++ b/tests/validation/fixtures/ScatterLayerFixture.h @@ -54,7 +54,7 @@ public: protected: template - void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f) + void fill(U &&tensor, int i, float lo = -10.f, float hi = 10.f) { switch(tensor.data_type()) { @@ -135,6 +135,22 @@ protected: { // Output Quantization not currently in use - fixture should be extended to support this. ARM_COMPUTE_UNUSED(o_qinfo); + TensorShape src_shape = a_shape; + TensorShape updates_shape = b_shape; + TensorShape indices_shape = c_shape; + + // 1. Collapse batch index into a single dim if necessary for update tensor and indices tensor. + if(c_shape.num_dimensions() >= 3) + { + indices_shape = indices_shape.collapsed_from(1); + updates_shape = updates_shape.collapsed_from(updates_shape.num_dimensions() - 2); // Collapses from last 2 dims + } + + // 2. Collapse data dims into a single dim. + // Collapse all src dims into 2 dims. First one holding data, the other being the index we iterate over. + src_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse all data dims into single dim. + src_shape = src_shape.collapsed_from(1); // Collapse all index dims into a single dim + updates_shape.collapse(updates_shape.num_dimensions() - 1); // Collapse data dims (all except last dim which is batch dim) // Create reference tensors SimpleTensor src{ a_shape, data_type, 1, a_qinfo }; diff --git a/tests/validation/reference/ScatterLayer.cpp b/tests/validation/reference/ScatterLayer.cpp index 7543b46bb1..283022e8e2 100644 --- a/tests/validation/reference/ScatterLayer.cpp +++ b/tests/validation/reference/ScatterLayer.cpp @@ -23,6 +23,7 @@ */ #include "ScatterLayer.h" #include "tests/validation/Helpers.h" +#include "arm_compute/core/TensorShape.h" namespace arm_compute { @@ -64,36 +65,67 @@ T reduce_op(const T ¤t,const T &update,const ScatterFunction func) template float reduce_op(const float ¤t,const float &update,const ScatterFunction func); } -// Note : This function currently only supports 1D src, 1D updates, 2D indices, 1D output tensors. +// NOTE: This function expects collapsed tensors as input. +// Batch dims for update/indices tensors should be collapsed into a single dim. +// Data dims should be collapsed into a single dim for both update and src tensors prior to calling this function. template SimpleTensor scatter_layer_internal(const SimpleTensor &src, const SimpleTensor &updates, const SimpleTensor &indices, const TensorShape &out_shape, const ScatterInfo &info) { + // 1. If zero initialization variable is false, copy src data to dst. SimpleTensor dst{ out_shape, src.data_type(), 1 }; - - // 1. If zero initialization variable is true, fill dst with 0 values. Else copy src data to dst. - if(info.zero_initialization) - { - for (int i = 0; i < src.num_elements(); ++i) - { - dst[i] = static_cast(0); - } - } - else + if(!info.zero_initialization) { std::copy_n(src.data(), src.num_elements(), dst.data()); } - // 2. Get max index of output tensor, then iterate over index tensor. - const int x_bound = static_cast(dst.shape().x()); + // Number of elements between each value of the dim being iterated through + const unsigned int data_stride = updates.shape().total_size_lower(updates.shape().num_dimensions() - 1); + const unsigned int no_output_dims = out_shape.num_dimensions(); + + // Calculate output stride at given index for all output dims. + std::vector out_stride_at_idx(no_output_dims); + for (unsigned int i = 0 ; i < no_output_dims; i++) + { + out_stride_at_idx[i] = out_shape.total_size_lower(i); + } + const unsigned int indices_x_dim = static_cast(indices.shape()[0]); + const unsigned int indices_y_dim = static_cast(indices.shape()[1]); - for(int i = 0; i < indices.num_elements(); ++i) + // 2. Iterate over indices tensor y-dim and replace sections of dst tensor with relevant areas of update tensor. + for(unsigned int i = 0; i < indices_y_dim; i++) { - // 3. Check whether index is out of bounds for dst, if not then apply reduce op. - const auto index = indices[i]; - if (index < x_bound && index >= 0) // Note : we ignore negative index values. + // NOTE : Currently, indices.shape() == [X, Y, 1, 1], where X is the indices dim and Y is the batch dim + // Starting index for both the update and indices tensors. + const unsigned int update_dim_start = i * data_stride; + const unsigned int indices_dim_start = i * indices_x_dim; + bool out_of_bounds = false; + unsigned int out_offset_acc = 0; + + // Iterate over each indices value for the relevant batch and accumulate the offset. + for(unsigned int j = 0; j < indices_x_dim; j++) + { + // Get first index value with i * indices_x_dim (iterating through y-dim/batch idx), then iterate through x dim by adding k + const int index_value = indices[indices_dim_start + j]; + const unsigned int out_dim = no_output_dims - (j+1); // Calculate corresponding output dim to current index value. + if(index_value < static_cast(out_shape[out_dim]) && index_value >= 0) + { + out_offset_acc += (index_value * out_stride_at_idx[out_dim]); // offset accumulation + } + else + { + out_of_bounds = true; + break; + } + } + + // If not out of bounds, copy update tensor elements to output + if(!out_of_bounds) { - dst[index] = reduce_op(dst[index], updates[i], info.func); + for (unsigned int j = 0 ; j < data_stride; j++) + { + dst[out_offset_acc + j] = reduce_op(dst[out_offset_acc + j], updates[update_dim_start + j], info.func); + } } } return dst; -- cgit v1.2.1