From d308df3186b4f6057f94b45b7bed7935c618ea80 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 1 Dec 2020 16:56:36 +0000 Subject: Remove unused CLGEMMMatrixVectorMultiplyKernel Partially Resolves: COMPMID-3924 Signed-off-by: Georgios Pinitas Change-Id: Ibc47bd5bf5203dbad8d0755608918fcb384053c3 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4633 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- Android.bp | 1 - docs/00_introduction.dox | 2 + src/core/CL/CLKernels.h | 1 - .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 178 --------------------- .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 84 ---------- src/runtime/CL/tuners/BifrostTuner.cpp | 22 --- 6 files changed, 2 insertions(+), 286 deletions(-) delete mode 100644 src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp delete mode 100644 src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h diff --git a/Android.bp b/Android.bp index a4ca5c79e8..e9b3588a83 100644 --- a/Android.bp +++ b/Android.bp @@ -139,7 +139,6 @@ cc_library_static { "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp", "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp", "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp", - "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp", "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp", "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp", "src/core/CL/kernels/CLGatherKernel.cpp", diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 7fe73c42f0..bdfbdb89b9 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -88,6 +88,8 @@ If there is more than one release in a month then an extra sequential number is v21.02 Public major release - Upgraded C++ standard to C++14 + - Removed kernels: + - NEGEMMMatrixVectorMultiplyKernel v20.11 Public major release - Various bug fixes. diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h index 282cc96dd8..b335372fa9 100644 --- a/src/core/CL/CLKernels.h +++ b/src/core/CL/CLKernels.h @@ -86,7 +86,6 @@ #include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" #include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" -#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "src/core/CL/kernels/CLGatherKernel.h" diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp deleted file mode 100644 index ee0abc56d3..0000000000 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -constexpr unsigned int num_elems_read_per_iteration = 4; -constexpr unsigned int num_rows_read_per_iteration = 4; - -Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1); - ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input0->data_type()) && (output->data_type() != DataType::S32)); - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(2) != input1->dimension(1)); - - return Status{}; -} - -std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output) -{ - const unsigned int border_x = ceil_to_multiple(input0->dimension(0), num_elems_read_per_iteration) - input0->dimension(0); - const unsigned int border_y = ceil_to_multiple(input0->dimension(1), num_rows_read_per_iteration) - input0->dimension(1); - - Window win = calculate_max_window(*input0, Steps(num_elems_read_per_iteration)); - - AccessWindowRectangle input0_access(input0, 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal input1_access(input1, 0, num_elems_read_per_iteration); - AccessWindowStatic output_access(output, 0, 0, output->dimension(0) + border_x, output->dimension(1) + border_y); - - bool window_changed = update_window_and_padding(win, input0_access, input1_access, output_access); - - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLGEMMMatrixVectorMultiplyKernel::CLGEMMMatrixVectorMultiplyKernel() - : _input0(nullptr), _input1(nullptr), _output(nullptr), _num_rows_read_per_iteration(0), _border_size(0) -{ -} -BorderSize CLGEMMMatrixVectorMultiplyKernel::border_size() const -{ - return _border_size; -} - -void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output); -} - -void CLGEMMMatrixVectorMultiplyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info())); - - _input0 = input0; - _input1 = input1; - _output = output; - - // Check if is a quantized operation - const bool is_quantized = is_data_type_quantized_asymmetric(_input0->info()->data_type()); - - // Create kernel - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input0->info()->data_type())); - build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input0->info()->dimension(0))); - build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input0->info()->dimension(1))); - - std::string kernel_name = is_quantized ? std::string("gemm_mv_quantized") : std::string("gemm_mv"); - _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - - // Add static arguments - if(is_quantized) - { - const UniformQuantizationInfo iq0_info = _input0->info()->quantization_info().uniform(); - const UniformQuantizationInfo iq1_info = _input1->info()->quantization_info().uniform(); - - unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor() + num_arguments_per_1D_tensor(); - _kernel.setArg(idx++, -iq0_info.offset); - _kernel.setArg(idx++, -iq1_info.offset); - } - - // Configure kernel window - _num_rows_read_per_iteration = num_rows_read_per_iteration; - - const unsigned int border_x = ceil_to_multiple(input0->info()->dimension(0), num_elems_read_per_iteration) - input0->info()->dimension(0); - const unsigned int border_y = ceil_to_multiple(input0->info()->dimension(1), _num_rows_read_per_iteration) - input0->info()->dimension(1); - - _border_size = BorderSize(border_y, border_x); - - auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); -} - -Status CLGEMMMatrixVectorMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get()).first); - - return Status{}; -} - -void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_3D(); - Window slice_in2 = window.first_slice_window_3D(); - Window slice_out = window.first_slice_window_3D(); - - // Setup input0 slice - slice_in.set(Window::DimX, Window::Dimension(0, _input0->info()->dimension(0), _input0->info()->dimension(0))); - slice_in.set(Window::DimY, Window::Dimension(0, _input0->info()->dimension(1) + border_size().bottom, _num_rows_read_per_iteration)); - slice_in.set(Window::DimZ, Window::Dimension(0, _input0->info()->dimension(2), 1)); - - // Setup input1 and output slice. Their dimensions are increased in the cl kernel. - slice_in2.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in2.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in2.set(Window::DimZ, Window::Dimension(0, 0, 0)); - - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - - unsigned int idx_1 = num_arguments_per_3D_tensor(); - - add_2D_tensor_argument(idx_1, _input1, slice_in2); - - do - { - unsigned int idx_0 = 0; - unsigned int idx_2 = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); - add_3D_tensor_argument(idx_0, _input0, slice_in); - add_1D_tensor_argument(idx_2, _output, slice_out); - enqueue(queue, *this, slice_in, lws_hint()); - } - while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h deleted file mode 100644 index bef8c231ac..0000000000 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the GEMM matrix vector multiply kernel. **/ -class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixVectorMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. - * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel - * - * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input. - * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED. - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; - int _num_rows_read_per_iteration; - BorderSize _border_size; -}; -} // arm_compute -#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */ diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 9490e0b219..8badd57b9e 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -171,24 +171,6 @@ void tune_im2col_kernel(CLIm2ColKernel &k) k.set_lws_hint(lws_hint); } -void tune_gemv_kernel(CLGEMMMatrixVectorMultiplyKernel &k) -{ - cl::NDRange lws_hint = k.lws_hint(); - const GPUTarget gpu_target = k.get_target(); - - // Configure the local work size for Bifrost with a value obtained - // via exhaustive autotuning for the MobileNets tensor shapes. - if(gpu_target_is_in(gpu_target, - GPUTarget::G71, GPUTarget::G72, GPUTarget::G76, - GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, - GPUTarget::G52, GPUTarget::G52LIT)) - { - lws_hint = cl::NDRange(1, 1, 1); - } - - k.set_lws_hint(lws_hint); -} - void tune_gemm_kernel(CLGEMMMatrixMultiplyKernel &k) { cl::NDRange lws_hint = k.lws_hint(); @@ -293,10 +275,6 @@ void BifrostTuner::tune_kernel_static(ICLKernel &kernel) { tune_im2col_kernel(*utils::cast::polymorphic_downcast(&kernel)); } - else if(dynamic_cast(&kernel) != nullptr) - { - tune_gemv_kernel(*utils::cast::polymorphic_downcast(&kernel)); - } else if(dynamic_cast(&kernel) != nullptr) { tune_gemm_kernel(*utils::cast::polymorphic_downcast(&kernel)); -- cgit v1.2.1