COMPMID-3776: Indirect GEMM

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I51a1b0f098bc3a8c408c50c92221e4df3061e12c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4343 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2020-11-02 01:37:17 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2020-11-12 15:59:25 +0000
commit: c0b6f76561580414f08633a804fc548ccad65659 (patch)
tree: 4d46b7f479de04f799e29095392948aeb370c029 /src/core/NEON/kernels/assembly
parent: 824061d9910ebb42cbe46b677c0b843db212c9a2 (diff)
download: ComputeLibrary-c0b6f76561580414f08633a804fc548ccad65659.tar.gz
5 files changed, 101 insertions, 205 deletions
diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
deleted file mode 100644
index 760274dba1..0000000000
--- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/WindowIterator.h"
-
-using namespace arm_compute;
-
-INEGEMMWrapperKernel::INEGEMMWrapperKernel()
-    : _a(nullptr), _b(nullptr), _c(nullptr), _params(), _gemm_info(), _window3d(), _window_shape()
-{
-}
-
-INEGEMMWrapperKernel::Params INEGEMMWrapperKernel::extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info)
-{
-    Params p;
-
-    ARM_COMPUTE_ERROR_ON_NULLPTR(a);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(b);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(c);
-
-    // Initalize params
-    p.M       = c->info()->tensor_shape().y();
-    p.N       = c->info()->tensor_shape().x();
-    p.K       = a->info()->tensor_shape().x();
-    p.multis  = b->info()->tensor_shape().z();
-    p.batches = c->info()->tensor_shape().total_size_upper(2) / p.multis; //COMPMID-1423: Agree on and document the layout of gemm inputs/outputs
-
-    // Update M in case of GEMM3D for output
-    if(gemm_info.depth_output_gemm3d() != 0)
-    {
-        p.M       = c->info()->tensor_shape().y() * c->info()->tensor_shape().z();
-        p.batches = c->info()->tensor_shape().total_size_upper(3) / p.multis;
-    }
-
-    return p;
-}
-
-void INEGEMMWrapperKernel::configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info)
-{
-    _gemm_info = gemm_info;
-    _params    = extract_parameters(a, b, c, gemm_info);
-    _a         = a;
-    _b         = b;
-    _c         = c;
-
-    _window3d     = configure_internal(alpha, beta);
-    _window_shape = _window3d.shape();
-
-    // Convert the 3D window into a 1D window in order to allow the scheduler to arbitrary split it.
-    Window collapsed;
-    collapsed.set(0, Window::Dimension(0, _window3d.num_iterations_total()));
-
-    INEKernel::configure(collapsed);
-}
-
-void INEGEMMWrapperKernel::run(const Window &window, const ThreadInfo &info)
-{
-    const Coordinates start_offset = index2coords(_window_shape, window.x().start());
-    const Coordinates end_offset   = index2coords(_window_shape, window.x().end() - 1);
-
-    run_internal(_window3d, start_offset, end_offset, info);
-}
diff --git a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h b/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
deleted file mode 100644
index 92c013260b..0000000000
--- a/src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef SRC_INEGEMMWRAPPERKERNEL_H
-#define SRC_INEGEMMWRAPPERKERNEL_H
-
-#include "src/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for all the arm_gemm Gemms
- */
-class INEGEMMWrapperKernel : public INEKernel
-{
-public:
-    /** Parameters defining the dimensions of the matrices being multiplied */
-    struct Params
-    {
-        unsigned int M{ 0 };       /**< Rows in output matrix C (and input matrix A). */
-        unsigned int N{ 0 };       /**< Columns in output matrix C (and input matrix B). */
-        unsigned int K{ 0 };       /**< Columns of input matrix A (= rows of input matrix B). */
-        unsigned int batches{ 0 }; /**< Number of "batched" GEMMs (unique A and C, shared B). */
-        unsigned int multis{ 0 };  /**< Number of "multi" GEMMs (unique A, B and C). */
-    };
-
-    static Params extract_parameters(const ITensor *a, const ITensor *b, const ITensor *c, const GEMMInfo &gemm_info);
-
-    /** Constructor */
-    INEGEMMWrapperKernel();
-    /** Prevent instances of this class from being copied */
-    INEGEMMWrapperKernel(const INEGEMMWrapperKernel &) = delete;
-    /** Prevent instances of this class from being copied */
-    INEGEMMWrapperKernel &operator=(const INEGEMMWrapperKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    INEGEMMWrapperKernel(INEGEMMWrapperKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    INEGEMMWrapperKernel &operator=(INEGEMMWrapperKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @note The input and output tensor must have the same dimensions
-     *
-     * @param[in]  a         Input tensor (Matrix A)
-     * @param[in]  b         Input tensor (Matrix B)
-     * @param[out] c         Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
-     * @param[in]  alpha     Scalar multiplier to apply to AB matrix product.
-     * @param[in]  beta      Scalar multiplier to apply to input C matrix before adding product.
-     * @param[in]  gemm_info GEMM meta-data
-     */
-    void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
-    /** Called as part of configure() after _a, _b, _c and _params have been set.
-     *
-     * @param[in] alpha Scalar multiplier to apply to AB matrix product.
-     * @param[in] beta  Scalar multiplier to apply to input C matrix before adding product.
-     *
-     * @return A 3D execution window.
-     */
-    virtual Window configure_internal(float alpha, float beta) = 0;
-
-    /** Run the kernel from the start to the end offset in window.
-     *
-     * @param[in] window       Window to use for the iteration
-     * @param[in] start_offset Where to start iterating from (In Window coordinates)
-     * @param[in] end_offset   Where to stop iterating (In Window coordinates).
-     * @param[in] info         Info about executing thread and CPU.
-     */
-    virtual void run_internal(const Window &window, const Coordinates &start_offset, const Coordinates &end_offset, const ThreadInfo &info) = 0;
-
-    const ITensor *_a;
-    const ITensor *_b;
-    ITensor       *_c;
-    Params         _params;
-    GEMMInfo       _gemm_info;
-
-private:
-    Window      _window3d;
-    TensorShape _window_shape;
-};
-
-} // namespace arm_compute
-
-#endif /* SRC_INEGEMMRAPPERKERNEL_H */
diff --git a/src/core/NEON/kernels/assembly/arm_gemm.hpp b/src/core/NEON/kernels/assembly/arm_gemm.hpp
index f6421c12ab..3088b080d6 100644
--- a/src/core/NEON/kernels/assembly/arm_gemm.hpp
+++ b/src/core/NEON/kernels/assembly/arm_gemm.hpp
@@ -43,7 +43,9 @@ enum class GemmMethod
     GEMM_INTERLEAVED_2D,
     QUANTIZE_WRAPPER,
     QUANTIZE_WRAPPER_2D,
-    GEMM_HYBRID_QUANTIZED
+    GEMM_HYBRID_QUANTIZED,
+    INDIRECT_GEMM,
+    CONVOLUTION_GEMM
 };
 
 struct KernelDescription
@@ -104,17 +106,19 @@ public:
     unsigned int      _Msize;
     unsigned int      _Nsize;
     unsigned int      _Ksize;
+    unsigned int      _Ksections;
     unsigned int      _nbatches;
     unsigned int      _nmulti;
+    bool              _indirect_input;
     Activation        _act;
     int               _maxthreads;
     const GemmConfig *_cfg;
 
-    GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N,
-             const unsigned int K, const unsigned int nbatches,
-             const unsigned int nmulti, Activation act, const int maxthreads,
+    GemmArgs(const CPUInfo *ci, unsigned int M, unsigned int N,
+             unsigned int K, unsigned int Ksections, unsigned int nbatches,
+             unsigned int nmulti, bool indirect_input, Activation act, const int maxthreads,
              const GemmConfig *cfg = nullptr)
-        : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti), _act(act), _maxthreads(maxthreads), _cfg(cfg)
+        : _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _Ksections(Ksections), _nbatches(nbatches), _nmulti(nmulti), _indirect_input(indirect_input), _act(act), _maxthreads(maxthreads), _cfg(cfg)
     {
     }
 };
@@ -143,8 +147,8 @@ public:
     Requantize32(const int32_t *bias, size_t bias_multi_stride,
                  int32_t a_offset, int32_t b_offset, int32_t c_offset,
                  int32_t requant_shift, int32_t requant_mul, int32_t minv, int32_t maxv)
-        : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max(requant_shift, int32_t(0))),
-          per_layer_right_shift(std::min(requant_shift, int32_t(0))), per_layer_mul(requant_mul), minval(minv), maxval(maxv)
+        : bias(bias), bias_multi_stride(bias_multi_stride), a_offset(a_offset), b_offset(b_offset), c_offset(c_offset), per_channel_requant(false), per_layer_left_shift(std::max<int32_t>(requant_shift, 0)),
+          per_layer_right_shift(std::min<int32_t>(requant_shift, 0)), per_layer_mul(requant_mul), minval(minv), maxval(maxv)
     {
     }
 
diff --git a/src/core/NEON/kernels/assembly/convolution_parameters.hpp b/src/core/NEON/kernels/assembly/convolution_parameters.hpp
new file mode 100644
index 0000000000..d0ef5b539f
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/convolution_parameters.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma once
+
+#include <cstdint>
+
+namespace arm_gemm
+{
+/*
+ * Parameter set for "convolution" type GEMM.
+ *
+ * For a "convolution" GEMM, the GEMM parameters (M, K) are specified as if
+ * an im2row had been performed on the input tensor to generate the operand
+ * matrix, but instead this structure describes the convolution parameters
+ * such that this can be done on the fly.
+ *
+ * The parameters describe the convolution details - the notional shape of
+ * the input and output tensors, whether padding is to be applied, the size
+ * of the kernel and a constant value to be used for padding (needed for
+ * quantized tensors).
+ *
+ * The second part describes the layout of the input tensor in memory, which
+ * is assumed to be in NHWC format.  This consists of a base pointer and
+ * strides for columns, rows and batches.  'multis' are not supported for
+ * convolution type GEMMs.
+ */
+struct ConvolutionParameters
+{
+    int64_t input_width;
+    int64_t input_height;
+    int64_t input_channels;
+    int64_t kernel_width;
+    int64_t kernel_height;
+    int64_t output_width;
+    int64_t output_height;
+    int64_t output_stride_w;
+    int64_t output_stride_h;
+    //          output_channels not included as they do not affect the input.
+    int64_t padding_top;
+    int64_t padding_left;
+    float   padding_value;
+};
+
+} // namespace arm_gemm
diff --git a/src/core/NEON/kernels/assembly/gemm_common.hpp b/src/core/NEON/kernels/assembly/gemm_common.hpp
index e9e56842c7..e1fb7a45a8 100644
--- a/src/core/NEON/kernels/assembly/gemm_common.hpp
+++ b/src/core/NEON/kernels/assembly/gemm_common.hpp
@@ -23,6 +23,7 @@
  */
 #pragma once
 
+#include "convolution_parameters.hpp"
 #include "ndrange.hpp"
 
 #include <cstddef>
@@ -77,7 +78,7 @@ public:
         return false;
     }
 
-    /** Main execute member function
+    /** Main execute member fucntion
      * @param [in] work_range     specifies the range of work we want to be computed, total range defined by get_window_size()
      * @param [in] thread_locator where are we inside of the thread space
      * @naram [in] threadid       a unique threadid
@@ -123,6 +124,19 @@ public:
     {
     }
 
+    /*** Indirect interface (optional) ***/
+    /* Set the indirect table.  This comprises a number of values per kernel point, and a densely packed array of pointers,
+     * multis * batches * kernel_points */
+    virtual void set_indirect_parameters_generic(size_t, const void *const *const *)
+    {
+    }
+
+    /*** Convolution interface (optional) ***/
+    /* Set the convolution parameters. */
+    virtual void set_convolution_parameters(ConvolutionParameters)
+    {
+    }
+
     // Destructor
     virtual ~IGemmCommon()
     {
@@ -200,6 +214,16 @@ public:
     {
         pretranspose_B_array(out, static_cast<const To *>(in), row_stride, multi_stride);
     }
+
+    /*** Indirect interface ***/
+    virtual void set_indirect_parameters(size_t, const To *const *const *)
+    {
+    }
+
+    void set_indirect_parameters_generic(size_t sz, const void *const *const *ptr) override
+    {
+        set_indirect_parameters(sz, reinterpret_cast<const To *const *const *>(ptr));
+    }
 };
 
 } // namespace arm_gemm
author	Georgios Pinitas <georgios.pinitas@arm.com>	2020-11-02 01:37:17 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2020-11-12 15:59:25 +0000
commit	c0b6f76561580414f08633a804fc548ccad65659 (patch)
tree	4d46b7f479de04f799e29095392948aeb370c029 /src/core/NEON/kernels/assembly
parent	824061d9910ebb42cbe46b677c0b843db212c9a2 (diff)
download	ComputeLibrary-c0b6f76561580414f08633a804fc548ccad65659.tar.gz