aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/core')
-rw-r--r--arm_compute/core/CPP/ICPPKernel.h22
-rw-r--r--arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h35
-rw-r--r--arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp3
-rw-r--r--arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp121
-rw-r--r--arm_compute/core/NEON/kernels/assembly/gemm_common.hpp40
5 files changed, 200 insertions, 21 deletions
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index f41567ee11..ec05af20bd 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,7 +49,25 @@ public:
* @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
* @param[in] info Info about executing thread and CPU.
*/
- virtual void run(const Window &window, const ThreadInfo &info) = 0;
+ virtual void run(const Window &window, const ThreadInfo &info)
+ {
+ ARM_COMPUTE_UNUSED(window);
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked");
+ }
+
+ /** legacy compatibility layer for implemantions which do not support thread_locator
+ * In these cases we simply narrow the interface down the legacy version
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
+ * @param[in] info Info about executing thread and CPU.
+ * @param[in] thread_locator Specifies "where" the current thread is in the multi-dimensional space
+ */
+ virtual void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator)
+ {
+ ARM_COMPUTE_UNUSED(thread_locator);
+ run(window, info);
+ }
/** Name of the kernel
*
diff --git a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h
index d612681c41..0e3dd74577 100644
--- a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h
+++ b/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
#define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H
+#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
@@ -65,15 +66,33 @@ public:
{
return _name.c_str();
}
- // Inherited methods overridden:
+
+
void run(const Window &window, const ThreadInfo &info) override
{
ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- auto first = window.x().start();
- auto last = window.x().end();
- _kernel->execute(first, last, info.thread_id);
+
+ auto win=arm_gemm::to_ndcoord(window);
+
+ arm_gemm::ndcoord_t thread_locator { };
+
+ _kernel->execute(win, thread_locator, info.thread_id);
}
+
+ // Inherited methods overridden:
+ void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) override
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel)));
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+
+ //convert between arm_compute and arm_gemm types
+ auto ndc_win = arm_gemm::to_ndcoord(window);
+ auto ndc_tlc = arm_gemm::to_ndcoord(thread_locator);
+
+ _kernel->execute(ndc_win, ndc_tlc, info.thread_id);
+ }
+
/** Initialise the kernel's input and output.
*
* @param[in] kernel Pointer to an assembly kernel implementation.
@@ -83,9 +102,9 @@ public:
{
ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel)));
_kernel = kernel;
- auto win_last = _kernel->get_window_size();
- Window win;
- win.set(Window::DimX, Window::Dimension(0, win_last, 1));
+
+ Window win = to_window(kernel->get_window_size());
+
INEKernel::configure(win);
if(!kernel_name_tag.empty())
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
index e89523981d..7723224ec8 100644
--- a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ enum class GemmMethod
GEMM_NATIVE,
GEMM_HYBRID,
GEMM_INTERLEAVED,
+ GEMM_INTERLEAVED_2D,
QUANTIZE_WRAPPER,
GEMM_HYBRID_QUANTIZED
};
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp
new file mode 100644
index 0000000000..7dff01003d
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma once
+
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/Dimensions.h"
+#include "src/core/NEON/kernels/arm_gemm/ndrange.hpp"
+
+#include <cassert>
+
+/* This file contains mapping between integral types used in arm_compute and arm_gemm
+ * These two codebases both require a degree of separation for the sake of modularity
+ * so maintain their own types which represent similar information.
+ */
+
+namespace arm_gemm {
+
+//we want to unify the maximum number of dimensions used beween arm_gemm and arm compute library
+constexpr std::size_t ndrange_max =
+ arm_compute::Dimensions<unsigned int>::num_max_dimensions;
+
+using ndrange_t=NDRange<ndrange_max>;
+using ndcoord_t=NDCoordinate<ndrange_max>;
+
+/* Converts an `arm_gemm::ndrange_t` to a `arm_compute::Window`
+ *
+ * As `NDRange<T>` does not not encode start positions, we specify
+ * the start to be zero in the produced `arm_compute::Window`
+ *
+ * @param [ndr] the `arm_gemm::ndrange_t` we wish to convert into a `arm_compute::Window`
+ * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndr`
+ */
+inline arm_compute::Window to_window(const ndrange_t& ndr) {
+ arm_compute::Window win;
+
+ for(unsigned int i = 0; i!=ndrange_max; ++i) {
+ //populate the window with the dimensions of the NDRange
+ win.set(i, arm_compute::Window::Dimension(0, ndr.get_size(i)));
+ }
+
+ return win;
+}
+
+/*
+ * Converts an `arm_gemm::ndcoord_t` to a `arm_compute::Window`
+ *
+ * @param [ndc] the `arm_gemm::ndcoord_t` we wish to convert into a `arm_compute::Window`
+ * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndc`
+ */
+inline arm_compute::Window to_window(const ndcoord_t& ndc) {
+ arm_compute::Window win;
+
+ for(unsigned int i = 0; i!=ndrange_max; ++i) {
+ const auto start = ndc.get_position(i);
+ const auto size = ndc.get_size(i);
+ const auto stop = start + size;
+
+ //populate the window with the dimensions of the NDRange
+ win.set(i, arm_compute::Window::Dimension(start, stop));
+ }
+
+ return win;
+}
+
+/** Convert an `arm_compute::Window` to an `arm_gemm::NDRange` of the same max dimensions
+ *
+ * It should be noted that `arm_compute::Window` specifies a `start()` and an `end()`
+ * where as `arm_gemm::ndrange_t` only has a size, as a result we store the delta between the range
+ *
+ * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndrange_t`
+ * @return the resultant ndrange_t
+ */
+inline ndrange_t to_ndrange(const arm_compute::Window& win) {
+ return {
+ static_cast<unsigned int>(win[0].end() - win[0].start()),
+ static_cast<unsigned int>(win[1].end() - win[1].start()),
+ static_cast<unsigned int>(win[2].end() - win[2].start()),
+ static_cast<unsigned int>(win[3].end() - win[3].start()),
+ static_cast<unsigned int>(win[4].end() - win[4].start()),
+ static_cast<unsigned int>(win[5].end() - win[5].start())
+ };
+}
+
+/** Convert an `arm_compute::Window` to an `arm_gemm::NDCoord` of the same max dimensions
+ *
+ * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndcoord_t`
+ * @return the resultant ndcoord_t
+ */
+inline ndcoord_t to_ndcoord(const arm_compute::Window& win) {
+ return {
+ { static_cast<unsigned int>(win[0].start()), static_cast<unsigned int>(win[0].end() - win[0].start()) },
+ { static_cast<unsigned int>(win[1].start()), static_cast<unsigned int>(win[1].end() - win[1].start()) },
+ { static_cast<unsigned int>(win[2].start()), static_cast<unsigned int>(win[2].end() - win[2].start()) },
+ { static_cast<unsigned int>(win[3].start()), static_cast<unsigned int>(win[3].end() - win[3].start()) },
+ { static_cast<unsigned int>(win[4].start()), static_cast<unsigned int>(win[4].end() - win[4].start()) },
+ { static_cast<unsigned int>(win[5].start()), static_cast<unsigned int>(win[5].end() - win[5].start()) }
+ };
+}
+
+} //namespace arm_gemm
diff --git a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
index d17fd5fe97..ea9b524e15 100644
--- a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,7 +23,10 @@
*/
#pragma once
+#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp"
+
#include <cstddef>
+#include <cassert>
#define UNUSED(x) (void)(x)
@@ -51,10 +54,10 @@ public:
void *C, const int ldc, const int C_batch_stride, const int C_multi_stride,
const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) = 0;
- /* For threading, we divide the work into some number of units and work
- * out internally what unit corresponds to what work. This returns the
- * total number of units. */
- virtual unsigned int get_window_size() const = 0;
+ /** @returns an ndrange containing ranges of the compute space which can be
+ * broken up and parallelised over
+ */
+ virtual ndrange_t get_window_size() const = 0;
/* The maximum thread count is specified when the GEMM is created. Some
* implementations need to know how many threads will actually run in
@@ -73,9 +76,12 @@ public:
/* Whether this GEMM can be dynamically scheduled or not. */
virtual bool supports_dynamic_scheduling() const { return false; }
- /* Actually do the work. Provide a threadid to index any per-thread
- * buffers, and a start/end range to indicate which work to do. */
- virtual void execute(unsigned int, unsigned int, int) = 0;
+ /** Main execute member fucntion
+ * @param [in] work_range specifies the range of work we want to be computed, total range defined by get_window_size()
+ * @param [in] thread_locator where are we inside of the thread space
+ * @naram [in] threadid a unique threadid
+ */
+ virtual void execute(const ndcoord_t& work_range, const ndcoord_t& thread_locator, int threadid) = 0;
/*** Working space interface (optional) ***/
/* Total number of bytes of temporary working space needed. If zero, it's not necessary to call set_working_space(). */
@@ -108,8 +114,7 @@ public:
virtual ~IGemmCommon() { }
};
-/*
- * "Real" GemmCommon class which is templated on the operand and return types.
+/* "Real" GemmCommon class which is templated on the operand and return types.
*
* In addition to correctly typed versions of the functions that operate on
* operand and return data, this class provides a default implementation of
@@ -178,4 +183,19 @@ public:
}
};
+template<typename GemmKernel>
+inline
+int unsigned get_total_window_size(const GemmKernel& kernel)
+{
+ auto window=kernel.get_window_size();
+
+ unsigned int total = 1;
+ for(unsigned i = 0; i != arm_gemm::ndrange_max; ++i)
+ {
+ total *= window.get_size(i);
+ }
+
+ return total;
+}
+
} // namespace arm_gemm