COMPMID-3520: Move ndrange.hpp header from arm_gemm to assembly

Change-Id: I6352a520ce38230cdfbad346b176cb659ab242a7 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3327 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-06-09 14:52:15 +0100
committer: Michele Di Giorgio <michele.digiorgio@arm.com> 2020-06-17 15:33:51 +0000
commit: 6ad60af32af672f27e152bf37790cd0c0c4db696 (patch)
tree: 43fb0f8926d30801ef1355676545297c82ae248a /src/core/NEON/kernels/assembly/arm_gemm.hpp
parent: 1fd2c80692ed8ecefc4d8deb783564ad19eaf70c (diff)
download: ComputeLibrary-6ad60af32af672f27e152bf37790cd0c0c4db696.tar.gz
1 files changed, 176 insertions, 0 deletions
diff --git a/src/core/NEON/kernels/assembly/arm_gemm.hpp b/src/core/NEON/kernels/assembly/arm_gemm.hpp
new file mode 100644
index 0000000000..7723224ec8
--- /dev/null
+++ b/src/core/NEON/kernels/assembly/arm_gemm.hpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2018-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma once
+
+#include <memory>
+#include <cstring>
+
+#include "arm_gemm_local.hpp"
+#include "gemm_common.hpp"
+
+namespace arm_gemm {
+
+enum class GemmMethod
+{
+    DEFAULT,
+    GEMV_BATCHED,
+    GEMV_PRETRANSPOSED,
+    GEMV_NATIVE_TRANSPOSED,
+    GEMM_NATIVE,
+    GEMM_HYBRID,
+    GEMM_INTERLEAVED,
+    GEMM_INTERLEAVED_2D,
+    QUANTIZE_WRAPPER,
+    GEMM_HYBRID_QUANTIZED
+};
+
+struct KernelDescription
+{
+    GemmMethod   method      = GemmMethod::DEFAULT;
+    std::string  name        = "";
+    bool         is_default  = false;
+
+    KernelDescription(GemmMethod m, std::string n, bool d=false) : method(m), name(n), is_default(d) { }
+    KernelDescription() noexcept  { }
+};
+
+struct GemmConfig
+{
+    GemmMethod   method           = GemmMethod::DEFAULT;
+    std::string  filter           = "";
+    unsigned int inner_block_size = 0;
+    unsigned int outer_block_size = 0;
+
+    GemmConfig(GemmMethod method) : method(method) { }
+    GemmConfig() { }
+};
+
+struct Activation
+{
+    enum class Type {
+        None,
+        ReLU,
+        BoundedReLU
+    };
+
+    Type    type;
+    float   param1;
+    float   param2;
+
+    Activation(Type type=Type::None, float p1=0.0f, float p2=0.0f) : type(type), param1(p1), param2(p2) { }
+};
+
+struct GemmArgs
+{
+public:
+    const CPUInfo    *_ci;
+    unsigned int      _Msize;
+    unsigned int      _Nsize;
+    unsigned int      _Ksize;
+    unsigned int      _nbatches;
+    unsigned int      _nmulti;
+    bool              _trA;
+    bool              _trB;
+    Activation        _act;
+    int               _maxthreads;
+    bool              _pretransposed_hint;
+    const GemmConfig *_cfg;
+
+    GemmArgs(const CPUInfo *ci, const unsigned int M, const unsigned int N,
+             const unsigned int K, const unsigned int nbatches,
+             const unsigned int nmulti, const bool trA, const bool trB,
+             Activation act, const int maxthreads,
+             const bool pretransposed_hint, const GemmConfig *cfg=nullptr ) :
+             _ci(ci), _Msize(M), _Nsize(N), _Ksize(K), _nbatches(nbatches), _nmulti(nmulti),
+             _trA(trA), _trB(trB), _act(act), _maxthreads(maxthreads),
+             _pretransposed_hint(pretransposed_hint), _cfg(cfg)
+    {
+    }
+};
+
+struct Requantize32
+{
+public:
+    const int32_t  *bias = nullptr;
+    size_t          bias_multi_stride = 0;
+    int32_t         a_offset = 0;
+    int32_t         b_offset = 0;
+    int32_t         c_offset = 0;
+    bool            per_channel_requant = false;
+    int32_t         per_layer_shift = 0;
+    int32_t         per_layer_mul = 0;
+    const int32_t  *per_channel_shifts = nullptr;
+    const int32_t  *per_channel_muls = nullptr;
+    int32_t         minval = 0;
+    int32_t         maxval = 0;
+
+    Requantize32() = default;
+
+    // Constructor for per-tensor quantization
+    Requantize32(const int32_t *bias, size_t bias_multi_stride,
+                 int32_t a_offset, int32_t b_offset, int32_t c_offset,
+                 int32_t requant_shift, int32_t requant_mul,
+                 int32_t minv, int32_t maxv) :
+        bias(bias), bias_multi_stride(bias_multi_stride),
+        a_offset(a_offset), b_offset(b_offset), c_offset(c_offset),
+        per_channel_requant(false), per_layer_shift(requant_shift), per_layer_mul(requant_mul),
+        minval(minv), maxval(maxv)
+    {
+    }
+
+    // Constructor for per-channel quantization
+    Requantize32(const int32_t *bias, size_t bias_multi_stride,
+                 int32_t a_offset, int32_t b_offset, int32_t c_offset,
+                 const int32_t *requant_shifts, const int32_t *requant_muls,
+                 int32_t minv, int32_t maxv) :
+        bias(bias), bias_multi_stride(bias_multi_stride),
+        a_offset(a_offset), b_offset(b_offset), c_offset(c_offset),
+        per_channel_requant(true), per_channel_shifts(requant_shifts), per_channel_muls(requant_muls),
+        minval(minv), maxval(maxv)
+    {
+    }
+};
+
+struct Nothing
+{
+};
+
+template<typename Top, typename Tret>
+using UniqueGemmCommon = std::unique_ptr<GemmCommon<Top, Tret> >;
+
+/* Low level API calls.
+ * These are implemented as 'GemmArgs' versions, or with the arguments explicitly listed. */
+
+/* get_gemm_method(): Given the templated types and provided parameters,
+ * which is the preferred method to implement this GEMM?  */
+template<typename Top, typename Tret, class OutputStage = Nothing>
+KernelDescription get_gemm_method(const GemmArgs &args, const OutputStage & ={});
+
+template<typename Top, typename Tret, class OutputStage = Nothing>
+UniqueGemmCommon<Top, Tret> gemm(const GemmArgs &args, const OutputStage & ={});
+
+template<typename Top, typename Tret, class OutputStage = Nothing>
+std::vector<KernelDescription> get_compatible_kernels(const GemmArgs &args, const OutputStage & ={});
+
+} // namespace arm_gemm
author	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-06-09 14:52:15 +0100
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	2020-06-17 15:33:51 +0000
commit	6ad60af32af672f27e152bf37790cd0c0c4db696 (patch)
tree	43fb0f8926d30801ef1355676545297c82ae248a /src/core/NEON/kernels/assembly/arm_gemm.hpp
parent	1fd2c80692ed8ecefc4d8deb783564ad19eaf70c (diff)
download	ComputeLibrary-6ad60af32af672f27e152bf37790cd0c0c4db696.tar.gz