From 3b162e53267d13d18891baf3372f971f1d4213d3 Mon Sep 17 00:00:00 2001 From: David Svantesson Date: Tue, 28 Mar 2023 14:13:32 +0000 Subject: Reorder added Adds Reorder kernel exposing blocking reorders from arm_gemm Resolves ONCPUML-1232 Change-Id: I42bf4166311fe1771565134d3ed7039fc8e30230 Signed-off-by: David Svantesson Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9500 Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/NEReorderKernel.cpp | 223 ++++++++++++++++++++++++++++++ src/core/NEON/kernels/NEReorderKernel.h | 93 +++++++++++++ 2 files changed, 316 insertions(+) create mode 100644 src/core/NEON/kernels/NEReorderKernel.cpp create mode 100644 src/core/NEON/kernels/NEReorderKernel.h (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEReorderKernel.cpp b/src/core/NEON/kernels/NEReorderKernel.cpp new file mode 100644 index 0000000000..c09745604f --- /dev/null +++ b/src/core/NEON/kernels/NEReorderKernel.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/core/NEON/kernels/NEReorderKernel.h" +#include "src/common/utils/Log.h" +#include "src/core/NEON/kernels/arm_gemm/transform.hpp" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" + +namespace arm_compute +{ + +void NEReorderKernel::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); + switch(_input->info()->data_type()) + { + case DataType::F32: + { + const int ksize_rows_elements = _xmax * _ksize; + const int jump_rows = ksize_rows_elements * window.x().start(); + const int k_start = window.x().start() * _ksize; + const int k_end = std::min(window.x().end() * _ksize, _kmax); + const int stride = _kmax; + if(k_start < k_end) + { + + switch(_output_wf) + { + case WeightFormat::OHWIo4: + { + arm_gemm::Transform<4, 1, true, arm_gemm::VLType::None>(reinterpret_cast(_output->buffer()) + jump_rows, reinterpret_cast(_input->buffer()), stride, k_start, k_end, 0, _xmax); + break; + } +#if defined(ARM_COMPUTE_ENABLE_SVE) + case WeightFormat::OHWIo8: + { + arm_gemm::Transform<1, 1, true, arm_gemm::VLType::SVE>(reinterpret_cast(_output->buffer()) + jump_rows, reinterpret_cast(_input->buffer()), stride, k_start, k_end, 0, _xmax); + break; + } +#endif /* ARM_COMPUTE_ENABLE_SVE */ + default: + { + ARM_COMPUTE_ERROR("Unsupported data type!"); + break; + } + } + } + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported data type!"); + } +} + +NEReorderKernel::NEReorderKernel() + : _input(nullptr), _output(nullptr), _ksize(0), _kmax(0), _xmax(0), _input_wf(WeightFormat::ANY), _output_wf(WeightFormat::ANY) +{ +} + +void NEReorderKernel::configure(const ITensor *input, ITensor *output, arm_compute::WeightFormat input_wf, arm_compute::WeightFormat output_wf) +{ + ARM_COMPUTE_LOG_PARAMS(input, output, input_wf, output_wf); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), input_wf, output_wf)); + + // Set variables + _input = input; + _output = output; + _input_wf = input_wf; + _output_wf = output_wf; + + // Setting parameters for transform + auto dims = input->info()->num_dimensions(); + switch(dims) + { + case 2: + { + _xmax = input->info()->dimension(0); // Number of columns in input matrix + _kmax = input->info()->dimension(1); // Number of rows in input matrix + break; + } + case 4: + { + _xmax = input->info()->dimension(2); // Number of columns in input matrix + _kmax = input->info()->dimension(3); // Number of rows in input matrix + break; + } + default: + { + ARM_COMPUTE_ERROR("Only 2 or 4 dimensions supported."); + } + } + + // Configure kernel window + // Window size is set by rows / _ksize + Window win; + int window_size = 0; + switch(_output_wf) + { +#if defined(ARM_COMPUTE_ENABLE_SVE) + case WeightFormat::OHWIo8: + { + _ksize = 8; + window_size = _kmax / _ksize; + break; + } +#endif /* ARM_COMPUTE_ENABLE_SVE */ + case WeightFormat::OHWIo4: + { + _ksize = 4; + window_size = _kmax / _ksize; + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported weight format."); + break; + } + } + if(_kmax % _ksize != 0) + { + window_size += 1; + } + + win.set(Window::DimX, Window::Dimension(0, window_size, 1)); + + INEKernel::configure(win); +} + +Status NEReorderKernel::validate(const ITensorInfo *input, const ITensorInfo *output, arm_compute::WeightFormat input_wf, arm_compute::WeightFormat output_wf) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); + if(output->tensor_shape().total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); + // Only input WeightFormat OHWI supported + ARM_COMPUTE_RETURN_ERROR_ON(input_wf != arm_compute::WeightFormat::OHWI); + int input_x_dim; + int input_k_dim; + int output_x_dim; + int output_k_dim; + auto dims = output->num_dimensions(); + switch(dims) + { + case 2: + { + input_x_dim = input->dimension(0); // Number of columns in input matrix + input_k_dim = input->dimension(1); // Number of rows in input matrix + output_x_dim = output->dimension(0); // Number of columns in output matrix + output_k_dim = output->dimension(1); // Number of rows in output matrix + break; + } + case 4: + { + input_x_dim = input->dimension(2); // Number of columns in input matrix + input_k_dim = input->dimension(3); // Number of rows in input matrix + output_x_dim = output->dimension(2); // Number of columns in output matrix + output_k_dim = output->dimension(3); // Number of rows in output matrix + break; + } + default: + { + ARM_COMPUTE_RETURN_ERROR_MSG("Only 2 or 4 dimensions supported."); + } + } + + int ksize; + switch(output_wf) + { + case WeightFormat::OHWIo8: + { + ksize = 8; + break; + } + case WeightFormat::OHWIo4: + { + ksize = 4; + break; + } + default: + { + ARM_COMPUTE_RETURN_ERROR_MSG("Unsupported weight format."); + break; + } + } + + // output k_dim needs to be same as input but multiple of ksize + int32_t rnd_up_input_kdim = arm_compute::ceil_to_multiple(input_k_dim, ksize); + ARM_COMPUTE_RETURN_ERROR_ON(rnd_up_input_kdim != output_k_dim); + // output x_dim needs to be same as input + ARM_COMPUTE_RETURN_ERROR_ON(input_x_dim != output_x_dim); + + } + return Status{}; +} + +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEReorderKernel.h b/src/core/NEON/kernels/NEReorderKernel.h new file mode 100644 index 0000000000..e744ff9e52 --- /dev/null +++ b/src/core/NEON/kernels/NEReorderKernel.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CORE_NEON_KERNELS_NEREORDERKERNEL +#define ACL_SRC_CORE_NEON_KERNELS_NEREORDERKERNEL + +#include "src/core/NEON/INEKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ + +/** Interface kernel to reorder tensor into blocked format. */ +class NEReorderKernel : public INEKernel +{ +public: + + const char *name() const override + { + return "NEReorderKernel"; + } + + /** Default constructor */ + NEReorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorderKernel(const NEReorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReorderKernel &operator=(const NEReorderKernel &) = delete; + /** Allow instances of this class to be moved */ + NEReorderKernel(NEReorderKernel &&) = default; + /** Allow instances of this class to be moved */ + NEReorderKernel &operator=(NEReorderKernel &&) = default; + /** Default destructor */ + ~NEReorderKernel() = default; + + /** Initialise the kernel's input and outputs. + * + * @param[in] input Source tensor with 2 or 4 dimensions. Data types supported: F32. + * @param[out] output Destination tensor. Data type supported: same as @p input. Shape same as @p input expect last dimension which needs to be multiple of blocking parameter _ksize. + * @param[in] input_wf WeightFormat of input. + * @param[in] output_wf WeightFormat of output. + */ + void configure(const ITensor *input, ITensor *output, arm_compute::WeightFormat input_wf, arm_compute::WeightFormat output_wf); + + /** Static function to check if given info will lead to a valid configuration of @ref NEReorderKernel + * + * @param[in] input Source tensor with 2 or 4 dimensions. Data types supported: F32. + * @param[in] output Destination tensor. Data type supported: same as @p input. Shape same as @p input expect last dimension which needs to be multiple of blocking parameter _ksize. + * @param[in] input_wf WeightFormat of input. + * @param[in] output_wf WeightFormat of output. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, arm_compute::WeightFormat input_wf, arm_compute::WeightFormat output_wf); + + // Inherited methods overridden: + void run(const Window &window, const ThreadInfo &info) override; + + +/*****************************************************************************/ + +private: + const ITensor *_input{nullptr}; // Input tensor + ITensor *_output{nullptr}; // Output tensor + int32_t _ksize{0}; // Blocking parameter, how many rows kernel reorders on each call + int32_t _kmax{0}; // Rows in input tensor + int32_t _xmax{0}; // Columns in input tensor + WeightFormat _input_wf{WeightFormat::UNSPECIFIED}; // WeightFormat of input tensor + WeightFormat _output_wf{WeightFormat::UNSPECIFIED}; // WeightFormat of output tensor +}; + +} // namespace arm_compute +#endif /* ACL_SRC_CORE_NEON_KERNELS_NEREORDERKERNEL */ -- cgit v1.2.1