From 96b16b65dd96351b8af1b2a785856ce13cc8ba84 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 1 Dec 2020 17:41:34 +0000 Subject: Remove support for (NE/CL)LocallyConnectedLayer Remove out-of-date and unmaintained LocallyConnectedLayer for both NEON and OpenCL. Resolves: COMPMID-3924 Signed-off-by: Georgios Pinitas Change-Id: Ia61398ed8cfa3876f41c1b342c4a80d1cca0ca83 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4634 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/CLKernels.h | 1 - src/core/CL/cl_kernels/gemm.cl | 70 +--------- .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 145 --------------------- .../CLLocallyConnectedMatrixMultiplyKernel.h | 85 ------------ 4 files changed, 1 insertion(+), 300 deletions(-) delete mode 100644 src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp delete mode 100644 src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h (limited to 'src/core/CL') diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h index b335372fa9..eea90eb599 100644 --- a/src/core/CL/CLKernels.h +++ b/src/core/CL/CLKernels.h @@ -103,7 +103,6 @@ #include "src/core/CL/kernels/CLIntegralImageKernel.h" #include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "src/core/CL/kernels/CLLKTrackerKernel.h" -#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" #include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" #include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h" #include "src/core/CL/kernels/CLMeanStdDevKernel.h" diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl index b6afb85aa4..6883aafee5 100644 --- a/src/core/CL/cl_kernels/gemm.cl +++ b/src/core/CL/cl_kernels/gemm.cl @@ -4379,72 +4379,4 @@ __kernel void gemm_ma_f16(TENSOR3D_DECLARATION(src), vstore8(out, 0, (__global half *)dst.ptr); } #endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) -#endif // defined(BETA) - -#if defined(WIDTH_VECTOR_A) -/** This OpenCL kernel computes the vector by matrix multiplication between each row of A (src0) and matrix B (src1) used for locally connected layer - * - * @note The width of A need to be passed at compile time using -DWIDTH_VECTOR_A - * - * @note The input A and matrix B must not be reshaped - * - * @param[in] src0_ptr Pointer to the source matrix. Supported data types: F32 - * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes) - * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes) - * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix - * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr - * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes) - * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes) - * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in bytes) - * @param[in] src1_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix - * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr - * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) - * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) - * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix - */ -__kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0), - TENSOR3D_DECLARATION(src1), - IMAGE_DECLARATION(dst)) -{ - int idx = get_global_id(0) * 4; - int idy = get_global_id(1); - - // Compute the address for the vector A and matrix B - int2 src_addr = ((int2)(src0_offset_first_element_in_bytes + src0_stride_y * idy, src1_offset_first_element_in_bytes + src1_stride_z * idy)); - src_addr.s1 += idx * sizeof(float); - - int end_row_vec_a = src_addr.s0 + (WIDTH_VECTOR_A * sizeof(float)); - - float4 acc = 0.0f; - - for(; src_addr.s0 <= (end_row_vec_a - 2 * (int)sizeof(float)); src_addr += (int2)(2 * sizeof(float), 2 * src1_stride_y)) - { - float2 a0 = vload2(0, (__global float *)(src0_ptr + src_addr.s0)); - float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1)); - float4 b1 = vload4(0, (__global float *)(src1_ptr + src_addr.s1 + src1_stride_y)); - - acc += b0 * (float4)a0.s0; - acc += b1 * (float4)a0.s1; - } - - for(; src_addr.s0 < end_row_vec_a; src_addr += (int2)(sizeof(float), src1_stride_y)) - { - float a0 = *((__global float *)(src0_ptr + src_addr.s0)); - float4 b0 = vload4(0, (__global float *)(src1_ptr + src_addr.s1)); - - acc += b0 * (float4)a0; - } - - // Compute destination address - Image dst = CONVERT_TO_IMAGE_STRUCT(dst); - - vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0))); -} -#endif // defined(WIDTH_VECTOR_A) +#endif // defined(BETA) \ No newline at end of file diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp deleted file mode 100644 index 49e04c32c2..0000000000 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" - -namespace arm_compute -{ -CLLocallyConnectedMatrixMultiplyKernel::CLLocallyConnectedMatrixMultiplyKernel() - : _input0(nullptr), _input1(nullptr), _output(nullptr) -{ -} - -namespace -{ -Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output); - ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1)); - - return Status{}; -} - -std::tuple validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output) -{ - const unsigned int num_elems_processed_per_iteration_x = max_cl_vector_width / data_size_from_type(input0->data_type()); - - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x)); - - AccessWindowHorizontal input0_access(input0, 0, num_elems_processed_per_iteration_x); - AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration_x); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration_x); - - bool window_changed = update_window_and_padding(win, input0_access, input1_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - - return std::make_tuple(err, win); -} -} // namespace - -void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output); -} - -void CLLocallyConnectedMatrixMultiplyKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info())); - - _input0 = input0; - _input1 = input1; - _output = output; - - cl::NDRange lws_hint; - if(output->info()->dimension(1) == 196) - { - lws_hint = cl::NDRange(1, 7); - } - else - { - lws_hint = cl::NDRange(8, 8); - } - - std::ostringstream mm_arguments; - std::set build_opts; - - mm_arguments << "-DWIDTH_VECTOR_A=" << input0->info()->dimension(0) << " "; - build_opts.emplace(mm_arguments.str()); - - // Create kernel - std::string data_type_name = lower_string(string_from_data_type(input0->info()->data_type())); - _kernel = create_kernel(compile_context, ("gemm_lc_vm_" + data_type_name), build_opts); - - // Configure kernel window - auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info()); - - ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - - ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); -} - -Status CLLocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output)); - ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get()))); - - return Status{}; -} - -void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - Window matrix_b_window; - matrix_b_window.use_tensor_dimensions(_input1->info()->tensor_shape()); - Window slice_matrix_b = matrix_b_window.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input0, slice); - add_3D_tensor_argument(idx, _input1, slice_matrix_b); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h deleted file mode 100644 index 5d0a22afa5..0000000000 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor. - * - * @attention The second input tensor must have at least 2 dimensions (matrix) - * - */ -class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLocallyConnectedMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 First input tensor. Data types supported: F32 - * @param[in] input1 Second input tensor. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Initialise the kernel's input, output and alpha - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 First input tensor. Data types supported: F32 - * @param[in] input1 Second input tensor. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result. Data type supported: same as @p input0 - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel - * - * @param[in] input0 First input tensor info. Data types supported: F32 - * @param[in] input1 Second input tensor info. Data type supported: same as @p input0 - * @param[in] output Output tensor info. Data type supported: same as @p input0 - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input0; - const ICLTensor *_input1; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */ -- cgit v1.2.1