aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/CL
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-28 10:20:18 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-05-19 11:38:32 +0000
commit11d8415aa57b69fb6c83e86a37e3026c22d1d37d (patch)
tree8f6bb12011ddc7275a8cc071dbf8ffe90a88e8eb /src/runtime/CL
parent856f66e6c61b77d03f754cd0fa8439891f0e4aca (diff)
downloadComputeLibrary-11d8415aa57b69fb6c83e86a37e3026c22d1d37d.tar.gz
Port DepthConvert to new Api
- Renames DepthConvert to Cast - Ports both NEDepthConverLayer and CLDepthConvert variants - Removes legacy shift capability from DepthConvert, allowing only shifts of 0 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL')
-rw-r--r--src/runtime/CL/functions/CLCast.cpp41
-rw-r--r--src/runtime/CL/functions/CLDepthConvertLayer.cpp45
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp11
-rw-r--r--src/runtime/CL/functions/CLLSTMLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp1
-rw-r--r--src/runtime/CL/functions/CLQLSTMLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLRNNLayer.cpp1
10 files changed, 80 insertions, 24 deletions
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
index 202140d8b9..53256ebed4 100644
--- a/src/runtime/CL/functions/CLCast.cpp
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,31 @@
*/
#include "arm_compute/runtime/CL/functions/CLCast.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClCast.h"
#include <utility>
namespace arm_compute
{
+struct CLCast::Impl
+{
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClCast> op{ nullptr };
+};
+
+CLCast::CLCast()
+ : _impl(std::make_unique<Impl>())
+{
+}
+CLCast::CLCast(CLCast &&) = default;
+CLCast &CLCast::operator=(CLCast &&) = default;
+CLCast::~CLCast() = default;
+
void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, policy);
@@ -36,13 +55,23 @@ void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy
void CLCast::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
{
- auto k = std::make_unique<CLDepthConvertLayerKernel>();
- k->configure(compile_context, input, output, policy, 0);
- _kernel = std::move(k);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ _impl->src = input;
+ _impl->dst = output;
+
+ _impl->op = std::make_unique<opencl::ClCast>();
+ _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy);
}
Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy)
{
- return CLDepthConvertLayerKernel::validate(input, output, policy, 0);
+ return opencl::ClCast::validate(input, output, policy);
+}
+
+void CLCast::run()
+{
+ ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } };
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 47bc52364d..6aa370b23c 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,31 @@
*/
#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClCast.h"
#include <utility>
namespace arm_compute
{
+struct CLDepthConvertLayer::Impl
+{
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClCast> op{ nullptr };
+};
+
+CLDepthConvertLayer::CLDepthConvertLayer()
+ : _impl(std::make_unique<Impl>())
+{
+}
+CLDepthConvertLayer::CLDepthConvertLayer(CLDepthConvertLayer &&) = default;
+CLDepthConvertLayer &CLDepthConvertLayer::operator=(CLDepthConvertLayer &&) = default;
+CLDepthConvertLayer::~CLDepthConvertLayer() = default;
+
void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift);
@@ -36,13 +55,27 @@ void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, C
void CLDepthConvertLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
- auto k = std::make_unique<CLDepthConvertLayerKernel>();
- k->configure(compile_context, input, output, policy, shift);
- _kernel = std::move(k);
+ ARM_COMPUTE_UNUSED(shift);
+
+ _impl->src = input;
+ _impl->dst = output;
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst);
+ ARM_COMPUTE_ERROR_ON(shift != 0);
+
+ _impl->op = std::make_unique<opencl::ClCast>();
+ _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy);
}
Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
{
- return CLDepthConvertLayerKernel::validate(input, output, policy, shift);
+ ARM_COMPUTE_RETURN_ERROR_ON(shift != 0);
+ return opencl::ClCast::validate(input, output, policy);
+}
+
+void CLDepthConvertLayer::run()
+{
+ ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } };
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 991472bb7a..50a145f9ca 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -28,7 +28,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 5dc7556b2f..3184d5dfe0 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -31,7 +31,6 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLCol2ImKernel.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index 7a01018f59..d5d1b5f41e 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -29,7 +29,6 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 099a2c980f..3be09581bd 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -34,12 +34,12 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/gpu/cl/kernels/ClCastKernel.h"
#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
@@ -189,7 +189,7 @@ inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
+ _weights_to_qasymm8(std::make_unique<opencl::kernels::ClCastKernel>()),
_mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
_mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
_mtx_b_reshape_kernel(std::make_unique<opencl::kernels::ClGemmReshapeRhsMatrixKernel>()),
@@ -272,7 +272,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
TensorInfo weights_info(*b->info());
weights_info.set_data_type(DataType::QASYMM8);
_qasymm8_weights.allocator()->init(weights_info);
- _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
+ _weights_to_qasymm8->configure(compile_context, b->info(), _qasymm8_weights.info(), ConvertPolicy::WRAP);
}
const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
@@ -480,7 +480,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
{
b_offset = -128;
weights_info.set_data_type(DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0));
+ ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCastKernel::validate(b, &weights_info, ConvertPolicy::WRAP));
}
const ITensorInfo *matrix_b_info = &weights_info;
if(reshape_matrix_b)
@@ -681,7 +681,8 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
if(_convert_to_qasymm8)
{
_qasymm8_weights.allocator()->allocate();
- CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
+ ITensorPack convert_to_qs8_pack = { { ACL_SRC, _original_b }, { ACL_DST, &_qasymm8_weights } };
+ CLScheduler::get().enqueue_op(*_weights_to_qasymm8, convert_to_qs8_pack, false);
}
if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp
index 146ac8f619..85d13c246e 100644
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -29,7 +29,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index 69974424c9..a44dcd2e24 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -27,7 +27,6 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index 7b6ec8f5c8..fcf5b9d2a4 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -30,7 +30,6 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp
index 45ced35782..755fa40121 100644
--- a/src/runtime/CL/functions/CLRNNLayer.cpp
+++ b/src/runtime/CL/functions/CLRNNLayer.cpp
@@ -28,7 +28,6 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"