diff options
Diffstat (limited to 'arm_compute/core')
5 files changed, 200 insertions, 21 deletions
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index f41567ee11..ec05af20bd 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,25 @@ public: * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) * @param[in] info Info about executing thread and CPU. */ - virtual void run(const Window &window, const ThreadInfo &info) = 0; + virtual void run(const Window &window, const ThreadInfo &info) + { + ARM_COMPUTE_UNUSED(window); + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked"); + } + + /** legacy compatibility layer for implemantions which do not support thread_locator + * In these cases we simply narrow the interface down the legacy version + * + * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) + * @param[in] info Info about executing thread and CPU. + * @param[in] thread_locator Specifies "where" the current thread is in the multi-dimensional space + */ + virtual void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) + { + ARM_COMPUTE_UNUSED(thread_locator); + run(window, info); + } /** Name of the kernel * diff --git a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h b/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h index d612681c41..0e3dd74577 100644 --- a/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h +++ b/arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H #define ARM_COMPUTE_ASSEMBLY_GEMM_KERNEL_WRAPPER_KERNEL_H +#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -65,15 +66,33 @@ public: { return _name.c_str(); } - // Inherited methods overridden: + + void run(const Window &window, const ThreadInfo &info) override { ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel))); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - auto first = window.x().start(); - auto last = window.x().end(); - _kernel->execute(first, last, info.thread_id); + + auto win=arm_gemm::to_ndcoord(window); + + arm_gemm::ndcoord_t thread_locator { }; + + _kernel->execute(win, thread_locator, info.thread_id); } + + // Inherited methods overridden: + void run_nd(const Window &window, const ThreadInfo &info, const Window &thread_locator) override + { + ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(_kernel))); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + + //convert between arm_compute and arm_gemm types + auto ndc_win = arm_gemm::to_ndcoord(window); + auto ndc_tlc = arm_gemm::to_ndcoord(thread_locator); + + _kernel->execute(ndc_win, ndc_tlc, info.thread_id); + } + /** Initialise the kernel's input and output. * * @param[in] kernel Pointer to an assembly kernel implementation. @@ -83,9 +102,9 @@ public: { ARM_COMPUTE_ERROR_ON_NULLPTR((reinterpret_cast<void *>(kernel))); _kernel = kernel; - auto win_last = _kernel->get_window_size(); - Window win; - win.set(Window::DimX, Window::Dimension(0, win_last, 1)); + + Window win = to_window(kernel->get_window_size()); + INEKernel::configure(win); if(!kernel_name_tag.empty()) diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp index e89523981d..7723224ec8 100644 --- a/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp +++ b/arm_compute/core/NEON/kernels/assembly/arm_gemm.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,7 @@ enum class GemmMethod GEMM_NATIVE, GEMM_HYBRID, GEMM_INTERLEAVED, + GEMM_INTERLEAVED_2D, QUANTIZE_WRAPPER, GEMM_HYBRID_QUANTIZED }; diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp new file mode 100644 index 0000000000..7dff01003d --- /dev/null +++ b/arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2020 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#pragma once + +#include "arm_compute/core/Window.h" +#include "arm_compute/core/Dimensions.h" +#include "src/core/NEON/kernels/arm_gemm/ndrange.hpp" + +#include <cassert> + +/* This file contains mapping between integral types used in arm_compute and arm_gemm + * These two codebases both require a degree of separation for the sake of modularity + * so maintain their own types which represent similar information. + */ + +namespace arm_gemm { + +//we want to unify the maximum number of dimensions used beween arm_gemm and arm compute library +constexpr std::size_t ndrange_max = + arm_compute::Dimensions<unsigned int>::num_max_dimensions; + +using ndrange_t=NDRange<ndrange_max>; +using ndcoord_t=NDCoordinate<ndrange_max>; + +/* Converts an `arm_gemm::ndrange_t` to a `arm_compute::Window` + * + * As `NDRange<T>` does not not encode start positions, we specify + * the start to be zero in the produced `arm_compute::Window` + * + * @param [ndr] the `arm_gemm::ndrange_t` we wish to convert into a `arm_compute::Window` + * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndr` + */ +inline arm_compute::Window to_window(const ndrange_t& ndr) { + arm_compute::Window win; + + for(unsigned int i = 0; i!=ndrange_max; ++i) { + //populate the window with the dimensions of the NDRange + win.set(i, arm_compute::Window::Dimension(0, ndr.get_size(i))); + } + + return win; +} + +/* + * Converts an `arm_gemm::ndcoord_t` to a `arm_compute::Window` + * + * @param [ndc] the `arm_gemm::ndcoord_t` we wish to convert into a `arm_compute::Window` + * @returns an `arm_compute::Window` representing the same dimensional ranges as `ndc` + */ +inline arm_compute::Window to_window(const ndcoord_t& ndc) { + arm_compute::Window win; + + for(unsigned int i = 0; i!=ndrange_max; ++i) { + const auto start = ndc.get_position(i); + const auto size = ndc.get_size(i); + const auto stop = start + size; + + //populate the window with the dimensions of the NDRange + win.set(i, arm_compute::Window::Dimension(start, stop)); + } + + return win; +} + +/** Convert an `arm_compute::Window` to an `arm_gemm::NDRange` of the same max dimensions + * + * It should be noted that `arm_compute::Window` specifies a `start()` and an `end()` + * where as `arm_gemm::ndrange_t` only has a size, as a result we store the delta between the range + * + * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndrange_t` + * @return the resultant ndrange_t + */ +inline ndrange_t to_ndrange(const arm_compute::Window& win) { + return { + static_cast<unsigned int>(win[0].end() - win[0].start()), + static_cast<unsigned int>(win[1].end() - win[1].start()), + static_cast<unsigned int>(win[2].end() - win[2].start()), + static_cast<unsigned int>(win[3].end() - win[3].start()), + static_cast<unsigned int>(win[4].end() - win[4].start()), + static_cast<unsigned int>(win[5].end() - win[5].start()) + }; +} + +/** Convert an `arm_compute::Window` to an `arm_gemm::NDCoord` of the same max dimensions + * + * @param [win] the `arm_compute::Window` we want to convert to `arm_gemm::ndcoord_t` + * @return the resultant ndcoord_t + */ +inline ndcoord_t to_ndcoord(const arm_compute::Window& win) { + return { + { static_cast<unsigned int>(win[0].start()), static_cast<unsigned int>(win[0].end() - win[0].start()) }, + { static_cast<unsigned int>(win[1].start()), static_cast<unsigned int>(win[1].end() - win[1].start()) }, + { static_cast<unsigned int>(win[2].start()), static_cast<unsigned int>(win[2].end() - win[2].start()) }, + { static_cast<unsigned int>(win[3].start()), static_cast<unsigned int>(win[3].end() - win[3].start()) }, + { static_cast<unsigned int>(win[4].start()), static_cast<unsigned int>(win[4].end() - win[4].start()) }, + { static_cast<unsigned int>(win[5].start()), static_cast<unsigned int>(win[5].end() - win[5].start()) } + }; +} + +} //namespace arm_gemm diff --git a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp index d17fd5fe97..ea9b524e15 100644 --- a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp +++ b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,7 +23,10 @@ */ #pragma once +#include "arm_compute/core/NEON/kernels/assembly/arm_gemm_compute_iface.hpp" + #include <cstddef> +#include <cassert> #define UNUSED(x) (void)(x) @@ -51,10 +54,10 @@ public: void *C, const int ldc, const int C_batch_stride, const int C_multi_stride, const void *bias, /* no row or batch stride needed */ const int bias_multi_stride) = 0; - /* For threading, we divide the work into some number of units and work - * out internally what unit corresponds to what work. This returns the - * total number of units. */ - virtual unsigned int get_window_size() const = 0; + /** @returns an ndrange containing ranges of the compute space which can be + * broken up and parallelised over + */ + virtual ndrange_t get_window_size() const = 0; /* The maximum thread count is specified when the GEMM is created. Some * implementations need to know how many threads will actually run in @@ -73,9 +76,12 @@ public: /* Whether this GEMM can be dynamically scheduled or not. */ virtual bool supports_dynamic_scheduling() const { return false; } - /* Actually do the work. Provide a threadid to index any per-thread - * buffers, and a start/end range to indicate which work to do. */ - virtual void execute(unsigned int, unsigned int, int) = 0; + /** Main execute member fucntion + * @param [in] work_range specifies the range of work we want to be computed, total range defined by get_window_size() + * @param [in] thread_locator where are we inside of the thread space + * @naram [in] threadid a unique threadid + */ + virtual void execute(const ndcoord_t& work_range, const ndcoord_t& thread_locator, int threadid) = 0; /*** Working space interface (optional) ***/ /* Total number of bytes of temporary working space needed. If zero, it's not necessary to call set_working_space(). */ @@ -108,8 +114,7 @@ public: virtual ~IGemmCommon() { } }; -/* - * "Real" GemmCommon class which is templated on the operand and return types. +/* "Real" GemmCommon class which is templated on the operand and return types. * * In addition to correctly typed versions of the functions that operate on * operand and return data, this class provides a default implementation of @@ -178,4 +183,19 @@ public: } }; +template<typename GemmKernel> +inline +int unsigned get_total_window_size(const GemmKernel& kernel) +{ + auto window=kernel.get_window_size(); + + unsigned int total = 1; + for(unsigned i = 0; i != arm_gemm::ndrange_max; ++i) + { + total *= window.get_size(i); + } + + return total; +} + } // namespace arm_gemm |