From 11d8415aa57b69fb6c83e86a37e3026c22d1d37d Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 28 Apr 2021 10:20:18 +0100 Subject: Port DepthConvert to new Api - Renames DepthConvert to Cast - Ports both NEDepthConverLayer and CLDepthConvert variants - Removes legacy shift capability from DepthConvert, allowing only shifts of 0 Signed-off-by: Georgios Pinitas Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/runtime/CL/functions/CLCast.cpp | 41 +++++++++++++++++--- src/runtime/CL/functions/CLDepthConvertLayer.cpp | 45 +++++++++++++++++++--- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 1 - .../CL/functions/CLGEMMConvolutionLayer.cpp | 1 - .../CL/functions/CLGEMMDeconvolutionLayer.cpp | 1 - .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 11 +++--- src/runtime/CL/functions/CLLSTMLayer.cpp | 1 - src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 1 - src/runtime/CL/functions/CLQLSTMLayer.cpp | 1 - src/runtime/CL/functions/CLRNNLayer.cpp | 1 - 10 files changed, 80 insertions(+), 24 deletions(-) (limited to 'src/runtime/CL/functions') diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp index 202140d8b9..53256ebed4 100644 --- a/src/runtime/CL/functions/CLCast.cpp +++ b/src/runtime/CL/functions/CLCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,31 @@ */ #include "arm_compute/runtime/CL/functions/CLCast.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Validate.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClCast.h" #include namespace arm_compute { +struct CLCast::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLCast::CLCast() + : _impl(std::make_unique()) +{ +} +CLCast::CLCast(CLCast &&) = default; +CLCast &CLCast::operator=(CLCast &&) = default; +CLCast::~CLCast() = default; + void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy) { configure(CLKernelLibrary::get().get_compile_context(), input, output, policy); @@ -36,13 +55,23 @@ void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy void CLCast::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy) { - auto k = std::make_unique(); - k->configure(compile_context, input, output, policy, 0); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _impl->src = input; + _impl->dst = output; + + _impl->op = std::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy); } Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy) { - return CLDepthConvertLayerKernel::validate(input, output, policy, 0); + return opencl::ClCast::validate(input, output, policy); +} + +void CLCast::run() +{ + ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } }; + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp index 47bc52364d..6aa370b23c 100644 --- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp +++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,31 @@ */ #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Validate.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClCast.h" #include namespace arm_compute { +struct CLDepthConvertLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr op{ nullptr }; +}; + +CLDepthConvertLayer::CLDepthConvertLayer() + : _impl(std::make_unique()) +{ +} +CLDepthConvertLayer::CLDepthConvertLayer(CLDepthConvertLayer &&) = default; +CLDepthConvertLayer &CLDepthConvertLayer::operator=(CLDepthConvertLayer &&) = default; +CLDepthConvertLayer::~CLDepthConvertLayer() = default; + void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift); @@ -36,13 +55,27 @@ void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, C void CLDepthConvertLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { - auto k = std::make_unique(); - k->configure(compile_context, input, output, policy, shift); - _kernel = std::move(k); + ARM_COMPUTE_UNUSED(shift); + + _impl->src = input; + _impl->dst = output; + + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst); + ARM_COMPUTE_ERROR_ON(shift != 0); + + _impl->op = std::make_unique(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy); } Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) { - return CLDepthConvertLayerKernel::validate(input, output, policy, shift); + ARM_COMPUTE_RETURN_ERROR_ON(shift != 0); + return opencl::ClCast::validate(input, output, policy); +} + +void CLDepthConvertLayer::run() +{ + ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } }; + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 991472bb7a..50a145f9ca 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -28,7 +28,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 5dc7556b2f..3184d5dfe0 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/kernels/CLCol2ImKernel.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp index 7a01018f59..d5d1b5f41e 100644 --- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp @@ -29,7 +29,6 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 099a2c980f..3be09581bd 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -34,12 +34,12 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" #include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/gpu/cl/kernels/ClCastKernel.h" #include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h" @@ -189,7 +189,7 @@ inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type) CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), - _weights_to_qasymm8(std::make_unique()), + _weights_to_qasymm8(std::make_unique()), _mm_native_kernel(std::make_unique()), _mm_reshaped_only_rhs_kernel(std::make_unique()), _mtx_b_reshape_kernel(std::make_unique()), @@ -272,7 +272,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con TensorInfo weights_info(*b->info()); weights_info.set_data_type(DataType::QASYMM8); _qasymm8_weights.allocator()->init(weights_info); - _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0); + _weights_to_qasymm8->configure(compile_context, b->info(), _qasymm8_weights.info(), ConvertPolicy::WRAP); } const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b; @@ -480,7 +480,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso { b_offset = -128; weights_info.set_data_type(DataType::QASYMM8); - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0)); + ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCastKernel::validate(b, &weights_info, ConvertPolicy::WRAP)); } const ITensorInfo *matrix_b_info = &weights_info; if(reshape_matrix_b) @@ -681,7 +681,8 @@ void CLGEMMLowpMatrixMultiplyCore::prepare() if(_convert_to_qasymm8) { _qasymm8_weights.allocator()->allocate(); - CLScheduler::get().enqueue(*_weights_to_qasymm8, false); + ITensorPack convert_to_qs8_pack = { { ACL_SRC, _original_b }, { ACL_DST, &_qasymm8_weights } }; + CLScheduler::get().enqueue_op(*_weights_to_qasymm8, convert_to_qs8_pack, false); } if(_is_gemm_reshaped && _reshape_b_only_on_first_run) diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 146ac8f619..85d13c246e 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -29,7 +29,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index 69974424c9..a44dcd2e24 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -27,7 +27,6 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp index 7b6ec8f5c8..fcf5b9d2a4 100644 --- a/src/runtime/CL/functions/CLQLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 45ced35782..755fa40121 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -28,7 +28,6 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" -- cgit v1.2.1