diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-11-02 01:37:17 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-11-12 15:59:25 +0000 |
commit | c0b6f76561580414f08633a804fc548ccad65659 (patch) | |
tree | 4d46b7f479de04f799e29095392948aeb370c029 /src/core/NEON/kernels/assembly | |
parent | 824061d9910ebb42cbe46b677c0b843db212c9a2 (diff) | |
download | ComputeLibrary-c0b6f76561580414f08633a804fc548ccad65659.tar.gz |
COMPMID-3776: Indirect GEMM
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I51a1b0f098bc3a8c408c50c92221e4df3061e12c
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4343
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/assembly')
-rw-r--r-- | src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp | 89 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h | 108 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/arm_gemm.hpp | 18 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/convolution_parameters.hpp | 65 | ||||
-rw-r--r-- | src/core/NEON/kernels/assembly/gemm_common.hpp | 26 |
5 files changed, 101 insertions, 205 deletions
diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp deleted file mode 100644 index 760274dba1..0000000000 --- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/WindowIterator.h" - -using namespace arm_compute; - -INEGEMMWrapperKernel::INEGEMMWrapperKernel() - : _a(nullptr), _b(nullptr), _c(nullptr), _params(), _gemm_info(), _window3d(), _window_shape() -{ -} - -INEGEMMWrapperKernel::Params INEGEMMWrapperKernel::extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info) -{ - Params p; - - ARM_COMPUTE_ERROR_ON_NULLPTR(a); - ARM_COMPUTE_ERROR_ON_NULLPTR(b); - ARM_COMPUTE_ERROR_ON_NULLPTR(c); - - // Initalize params - p.M = c->info()->tensor_shape().y(); - p.N = c->info()->tensor_shape().x(); - p.K = a->info()->tensor_shape().x(); - p.multis = b->info()->tensor_shape().z(); - p.batches = c->info()->tensor_shape().total_size_upper(2) / p.multis; //COMPMID-1423: Agree on and document the layout of gemm inputs/outputs - - // Update M in case of GEMM3D for output - if(gemm_info.depth_output_gemm3d() != 0) - { - p.M = c->info()->tensor_shape().y() * c->info()->tensor_shape().z(); - p.batches = c->info()->tensor_shape().total_size_upper(3) / p.multis; - } - - return p; -} - -void INEGEMMWrapperKernel::configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info) -{ - _gemm_info = gemm_info; - _params = extract_parameters(a, b, c, gemm_info); - _a = a; - _b = b; - _c = c; - - _window3d = configure_internal(alpha, beta); - _window_shape = _window3d.shape(); - - // Convert the 3D window into a 1D window in order to allow the scheduler to arbitrary split it. - Window collapsed; - collapsed.set(0, Window::Dimension(0, _window3d.num_iterations_total())); - - INEKernel::configure(collapsed); -} - -void INEGEMMWrapperKernel::run(const Window &window, const ThreadInfo &info) -{ - const Coordinates start_offset = index2coords(_window_shape, window.x().start()); - const Coordinates end_offset = index2coords(_window_shape, window.x().end() - 1); - - run_internal(_window3d, start_offset, end_offset, info); -} diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h deleted file mode 100644 index 92c013260b..0000000000 --- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_INEGEMMWRAPPERKERNEL_H -#define SRC_INEGEMMWRAPPERKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Common interface for all the arm_gemm Gemms - */ -class INEGEMMWrapperKernel : public INEKernel -{ -public: - /** Parameters defining the dimensions of the matrices being multiplied */ - struct Params - { - unsigned int M{ 0 }; /**< Rows in output matrix C (and input matrix A). */ - unsigned int N{ 0 }; /**< Columns in output matrix C (and input matrix B). */ - unsigned int K{ 0 }; /**< Columns of input matrix A (= rows of input matrix B). */ - unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */ - unsigned int multis{ 0 }; /**< Number of "multi" GEMMs (unique A, B and C). */ - }; - - static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info); - - /** Constructor */ - INEGEMMWrapperKernel(); - /** Prevent instances of this class from being copied */ - INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete; - /** Prevent instances of this class from being copied */ - INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete; - /** Allow instances of this class to be moved */ - INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default; - /** Allow instances of this class to be moved */ - INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * @param[in] gemm_info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -protected: - /** Called as part of configure() after _a, _b, _c and _params have been set. - * - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * - * @return A 3D execution window. - */ - virtual Window configure_internal(float alpha, float beta) = 0; - - /** Run the kernel from the start to the end offset in window. - * - * @param[in] window Window to use for the iteration - * @param[in] start_offset Where to start iterating from (In Window coordinates) - * @param[in] end_offset Where to stop iterating (In Window coordinates). - * @param[in] info Info about executing thread and CPU. - */ - virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0; - - const ITensor *_a; - const ITensor *_b; - ITensor *_c; - Params _params; - GEMMInfo _gemm_info; - -private: - Window _window3d; - TensorShape _window_shape; -}; - -} // namespace arm_compute - -#endif /* SRC_INEGEMMRAPPERKERNEL_H */ diff --git a/src/core/NEON/kernels/assembly/arm_gemm.hpp b/src/core/NEON/kernels/assembly/arm_gemm.hpp index f6421c12ab..3088b080d6 100644 --- a/src/core/NEON/kernels/assembly/arm_gemm.hpp +++ b/src/core/NEON/kernels/assembly/arm_gemm.hpp @@ -43,7 +43,9 @@ enum class GemmMethod GEMM_INTERLEAVED_2D, QUANTIZE_WRAPPER, QUANTIZE_WRAPPER_2D, - GEMM_HYBRID_QUANTIZED + GEMM_HYBRID_QUANTIZED, + INDIRECT_GEMM, + CONVOLUTION_GEMM }; struct KernelDescription @@ -104,17 +106,19 @@ public: unsigned int _Msize; unsigned int _Nsize; unsigned int _Ksize; + unsigned int _Ksections; unsigned int _nbatches; unsigned int _nmulti; + bool _indirect_input; Activation _act; int _maxthreads; const GemmConfig *_cfg; - GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N, - const unsigned int K, const unsigned int nbatches, - const unsigned int nmulti, Activation act, const int maxthreads, + GemmArgs(const CPUInfo *ci, unsigned int M, unsigned int N, + unsigned int K, unsigned int Ksections, unsigned int nbatches, + unsigned int nmulti, bool indirect_input, Activation act, const int maxthreads, const GemmConfig *cfg = nullptr) - : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti), _act(act), _maxthreads(maxthreads), _cfg(cfg) + : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _Ksections(Ksections), _nbatches(nbatches), _nmulti(nmulti), _indirect_input(indirect_input), _act(act), _maxthreads(maxthreads), _cfg(cfg) { } }; @@ -143,8 +147,8 @@ public: Requantize32(const int32_t *bias, size_t bias_multi_stride, int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv) - : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max(requant_shift, int32_t(0))), - per_layer_right_shift(std::min(requant_shift, int32_t(0))), per_layer_mul(requant_mul), minval(minv), maxval(maxv) + : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max<int32_t>(requant_shift, 0)), + per_layer_right_shift(std::min<int32_t>(requant_shift, 0)), per_layer_mul(requant_mul), minval(minv), maxval(maxv) { } diff --git a/src/core/NEON/kernels/assembly/convolution_parameters.hpp b/src/core/NEON/kernels/assembly/convolution_parameters.hpp new file mode 100644 index 0000000000..d0ef5b539f --- /dev/null +++ b/src/core/NEON/kernels/assembly/convolution_parameters.hpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +#include <cstdint> + +namespace arm_gemm +{ +/* + * Parameter set for "convolution" type GEMM. + * + * For a "convolution" GEMM, the GEMM parameters (M, K) are specified as if + * an im2row had been performed on the input tensor to generate the operand + * matrix, but instead this structure describes the convolution parameters + * such that this can be done on the fly. + * + * The parameters describe the convolution details - the notional shape of + * the input and output tensors, whether padding is to be applied, the size + * of the kernel and a constant value to be used for padding (needed for + * quantized tensors). + * + * The second part describes the layout of the input tensor in memory, which + * is assumed to be in NHWC format. This consists of a base pointer and + * strides for columns, rows and batches. 'multis' are not supported for + * convolution type GEMMs. + */ +struct ConvolutionParameters +{ + int64_t input_width; + int64_t input_height; + int64_t input_channels; + int64_t kernel_width; + int64_t kernel_height; + int64_t output_width; + int64_t output_height; + int64_t output_stride_w; + int64_t output_stride_h; + // output_channels not included as they do not affect the input. + int64_t padding_top; + int64_t padding_left; + float padding_value; +}; + +} // namespace arm_gemm diff --git a/src/core/NEON/kernels/assembly/gemm_common.hpp b/src/core/NEON/kernels/assembly/gemm_common.hpp index e9e56842c7..e1fb7a45a8 100644 --- a/src/core/NEON/kernels/assembly/gemm_common.hpp +++ b/src/core/NEON/kernels/assembly/gemm_common.hpp @@ -23,6 +23,7 @@ */ #pragma once +#include "convolution_parameters.hpp" #include "ndrange.hpp" #include <cstddef> @@ -77,7 +78,7 @@ public: return false; } - /** Main execute member function + /** Main execute member fucntion * @param [in] work_range specifies the range of work we want to be computed, total range defined by get_window_size() * @param [in] thread_locator where are we inside of the thread space * @naram [in] threadid a unique threadid @@ -123,6 +124,19 @@ public: { } + /*** Indirect interface (optional) ***/ + /* Set the indirect table. This comprises a number of values per kernel point, and a densely packed array of pointers, + * multis * batches * kernel_points */ + virtual void set_indirect_parameters_generic(size_t, const void *const *const *) + { + } + + /*** Convolution interface (optional) ***/ + /* Set the convolution parameters. */ + virtual void set_convolution_parameters(ConvolutionParameters) + { + } + // Destructor virtual ~IGemmCommon() { @@ -200,6 +214,16 @@ public: { pretranspose_B_array(out, static_cast<const To *>(in), row_stride, multi_stride); } + + /*** Indirect interface ***/ + virtual void set_indirect_parameters(size_t, const To *const *const *) + { + } + + void set_indirect_parameters_generic(size_t sz, const void *const *const *ptr) override + { + set_indirect_parameters(sz, reinterpret_cast<const To *const *const *>(ptr)); + } }; } // namespace arm_gemm |