aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-28 10:20:18 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-05-19 11:38:32 +0000
commit11d8415aa57b69fb6c83e86a37e3026c22d1d37d (patch)
tree8f6bb12011ddc7275a8cc071dbf8ffe90a88e8eb /src/runtime
parent856f66e6c61b77d03f754cd0fa8439891f0e4aca (diff)
downloadComputeLibrary-11d8415aa57b69fb6c83e86a37e3026c22d1d37d.tar.gz
Port DepthConvert to new Api
- Renames DepthConvert to Cast - Ports both NEDepthConverLayer and CLDepthConvert variants - Removes legacy shift capability from DepthConvert, allowing only shifts of 0 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/CL/functions/CLCast.cpp41
-rw-r--r--src/runtime/CL/functions/CLDepthConvertLayer.cpp45
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp11
-rw-r--r--src/runtime/CL/functions/CLLSTMLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp1
-rw-r--r--src/runtime/CL/functions/CLQLSTMLayer.cpp1
-rw-r--r--src/runtime/CL/functions/CLRNNLayer.cpp1
-rw-r--r--src/runtime/NEON/functions/NECast.cpp42
-rw-r--r--src/runtime/NEON/functions/NEDepthConvertLayer.cpp45
-rw-r--r--src/runtime/cpu/operators/CpuCast.cpp44
-rw-r--r--src/runtime/cpu/operators/CpuCast.h73
-rw-r--r--src/runtime/gpu/cl/operators/ClCast.cpp45
-rw-r--r--src/runtime/gpu/cl/operators/ClCast.h74
16 files changed, 386 insertions, 41 deletions
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
index 202140d8b9..53256ebed4 100644
--- a/src/runtime/CL/functions/CLCast.cpp
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,31 @@
*/
#include "arm_compute/runtime/CL/functions/CLCast.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClCast.h"
#include <utility>
namespace arm_compute
{
+struct CLCast::Impl
+{
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClCast> op{ nullptr };
+};
+
+CLCast::CLCast()
+ : _impl(std::make_unique<Impl>())
+{
+}
+CLCast::CLCast(CLCast &&) = default;
+CLCast &CLCast::operator=(CLCast &&) = default;
+CLCast::~CLCast() = default;
+
void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, policy);
@@ -36,13 +55,23 @@ void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy
void CLCast::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
{
- auto k = std::make_unique<CLDepthConvertLayerKernel>();
- k->configure(compile_context, input, output, policy, 0);
- _kernel = std::move(k);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ _impl->src = input;
+ _impl->dst = output;
+
+ _impl->op = std::make_unique<opencl::ClCast>();
+ _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy);
}
Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy)
{
- return CLDepthConvertLayerKernel::validate(input, output, policy, 0);
+ return opencl::ClCast::validate(input, output, policy);
+}
+
+void CLCast::run()
+{
+ ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } };
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 47bc52364d..6aa370b23c 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,12 +23,31 @@
*/
#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Validate.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClCast.h"
#include <utility>
namespace arm_compute
{
+struct CLDepthConvertLayer::Impl
+{
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClCast> op{ nullptr };
+};
+
+CLDepthConvertLayer::CLDepthConvertLayer()
+ : _impl(std::make_unique<Impl>())
+{
+}
+CLDepthConvertLayer::CLDepthConvertLayer(CLDepthConvertLayer &&) = default;
+CLDepthConvertLayer &CLDepthConvertLayer::operator=(CLDepthConvertLayer &&) = default;
+CLDepthConvertLayer::~CLDepthConvertLayer() = default;
+
void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift);
@@ -36,13 +55,27 @@ void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, C
void CLDepthConvertLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
- auto k = std::make_unique<CLDepthConvertLayerKernel>();
- k->configure(compile_context, input, output, policy, shift);
- _kernel = std::move(k);
+ ARM_COMPUTE_UNUSED(shift);
+
+ _impl->src = input;
+ _impl->dst = output;
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst);
+ ARM_COMPUTE_ERROR_ON(shift != 0);
+
+ _impl->op = std::make_unique<opencl::ClCast>();
+ _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy);
}
Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
{
- return CLDepthConvertLayerKernel::validate(input, output, policy, shift);
+ ARM_COMPUTE_RETURN_ERROR_ON(shift != 0);
+ return opencl::ClCast::validate(input, output, policy);
+}
+
+void CLDepthConvertLayer::run()
+{
+ ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } };
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 991472bb7a..50a145f9ca 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -28,7 +28,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index 5dc7556b2f..3184d5dfe0 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -31,7 +31,6 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLCol2ImKernel.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index 7a01018f59..d5d1b5f41e 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -29,7 +29,6 @@
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 099a2c980f..3be09581bd 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -34,12 +34,12 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/gpu/cl/kernels/ClCastKernel.h"
#include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
@@ -189,7 +189,7 @@ inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
+ _weights_to_qasymm8(std::make_unique<opencl::kernels::ClCastKernel>()),
_mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
_mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
_mtx_b_reshape_kernel(std::make_unique<opencl::kernels::ClGemmReshapeRhsMatrixKernel>()),
@@ -272,7 +272,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
TensorInfo weights_info(*b->info());
weights_info.set_data_type(DataType::QASYMM8);
_qasymm8_weights.allocator()->init(weights_info);
- _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
+ _weights_to_qasymm8->configure(compile_context, b->info(), _qasymm8_weights.info(), ConvertPolicy::WRAP);
}
const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
@@ -480,7 +480,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
{
b_offset = -128;
weights_info.set_data_type(DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0));
+ ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCastKernel::validate(b, &weights_info, ConvertPolicy::WRAP));
}
const ITensorInfo *matrix_b_info = &weights_info;
if(reshape_matrix_b)
@@ -681,7 +681,8 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
if(_convert_to_qasymm8)
{
_qasymm8_weights.allocator()->allocate();
- CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
+ ITensorPack convert_to_qs8_pack = { { ACL_SRC, _original_b }, { ACL_DST, &_qasymm8_weights } };
+ CLScheduler::get().enqueue_op(*_weights_to_qasymm8, convert_to_qs8_pack, false);
}
if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp
index 146ac8f619..85d13c246e 100644
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -29,7 +29,6 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index 69974424c9..a44dcd2e24 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -27,7 +27,6 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index 7b6ec8f5c8..fcf5b9d2a4 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -30,7 +30,6 @@
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp
index 45ced35782..755fa40121 100644
--- a/src/runtime/CL/functions/CLRNNLayer.cpp
+++ b/src/runtime/CL/functions/CLRNNLayer.cpp
@@ -28,7 +28,6 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
diff --git a/src/runtime/NEON/functions/NECast.cpp b/src/runtime/NEON/functions/NECast.cpp
index a42f512ce6..b519576ad5 100644
--- a/src/runtime/NEON/functions/NECast.cpp
+++ b/src/runtime/NEON/functions/NECast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,23 +23,45 @@
*/
#include "arm_compute/runtime/NEON/functions/NECast.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
-
-#include <utility>
+#include "arm_compute/core/Validate.h"
+#include "src/runtime/cpu/operators/CpuCast.h"
namespace arm_compute
{
+struct NECast::Impl
+{
+ const ITensor *src{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuCast> op{ nullptr };
+};
+
+NECast::NECast()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NECast::NECast(NECast &&) = default;
+NECast &NECast::operator=(NECast &&) = default;
+NECast::~NECast() = default;
+
void NECast::configure(ITensor *input, ITensor *output, ConvertPolicy policy)
{
- auto k = std::make_unique<NEDepthConvertLayerKernel>();
- k->configure(input, output, policy, 0);
- _kernel = std::move(k);
+ _impl->src = input;
+ _impl->dst = output;
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst);
+
+ _impl->op = std::make_unique<cpu::CpuCast>();
+ _impl->op->configure(_impl->src->info(), _impl->dst->info(), policy);
}
Status NECast::validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy)
{
- return NEDepthConvertLayerKernel::validate(input, output, policy, 0);
+ return cpu::CpuCast::validate(input, output, policy);
+}
+
+void NECast::run()
+{
+ ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } };
+ _impl->op->run(pack);
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
index 761de8eb60..07e985c25e 100644
--- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,20 +23,51 @@
*/
#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
-#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "arm_compute/core/Validate.h"
+#include "src/runtime/cpu/operators/CpuCast.h"
#include <utility>
-using namespace arm_compute;
+namespace arm_compute
+{
+struct NEDepthConvertLayer::Impl
+{
+ const ITensor *src{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuCast> op{ nullptr };
+};
+
+NEDepthConvertLayer::NEDepthConvertLayer()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEDepthConvertLayer::NEDepthConvertLayer(NEDepthConvertLayer &&) = default;
+NEDepthConvertLayer &NEDepthConvertLayer::operator=(NEDepthConvertLayer &&) = default;
+NEDepthConvertLayer::~NEDepthConvertLayer() = default;
void NEDepthConvertLayer::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
{
- auto k = std::make_unique<NEDepthConvertLayerKernel>();
- k->configure(input, output, policy, shift);
- _kernel = std::move(k);
+ ARM_COMPUTE_UNUSED(shift);
+
+ _impl->src = input;
+ _impl->dst = output;
+
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst);
+ ARM_COMPUTE_ERROR_ON(shift != 0);
+
+ _impl->op = std::make_unique<cpu::CpuCast>();
+ _impl->op->configure(_impl->src->info(), _impl->dst->info(), policy);
}
Status NEDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
{
- return NEDepthConvertLayerKernel::validate(input, output, policy, shift);
+ ARM_COMPUTE_RETURN_ERROR_ON(shift != 0);
+ return cpu::CpuCast::validate(input, output, policy);
+}
+
+void NEDepthConvertLayer::run()
+{
+ ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } };
+ _impl->op->run(pack);
}
+} // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuCast.cpp b/src/runtime/cpu/operators/CpuCast.cpp
new file mode 100644
index 0000000000..5a4f6c518e
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuCast.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/cpu/operators/CpuCast.h"
+
+#include "src/core/cpu/kernels/CpuCastKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+void CpuCast::configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
+{
+ auto k = std::make_unique<kernels::CpuCastKernel>();
+ k->configure(src, dst, policy);
+ _kernel = std::move(k);
+}
+
+Status CpuCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
+{
+ return kernels::CpuCastKernel::validate(src, dst, policy);
+}
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuCast.h b/src/runtime/cpu/operators/CpuCast.h
new file mode 100644
index 0000000000..2aea2d2b09
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuCast.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_CAST_H
+#define ARM_COMPUTE_CPU_CAST_H
+
+#include "src/runtime/cpu/ICpuOperator.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Basic function to run @ref kernels::CpuCastKernel */
+class CpuCast : public ICpuOperator
+{
+public:
+ /** Constructor */
+ CpuCast() = default;
+ /** Configure operator for a given list of arguments
+ *
+ * Input data type must be different than output data type.
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:-----------------------------------------------|
+ * |QASYMM8_SIGNED | S16, S32, F32, F16 |
+ * |QASYMM8 | U16, S16, S32, F32, F16 |
+ * |U8 | U16, S16, S32, F32, F16 |
+ * |U16 | U8, U32 |
+ * |S16 | QASYMM8_SIGNED, U8, S32 |
+ * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 |
+ * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 |
+ * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8|
+ *
+ * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[out] dst The destination tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy.
+ */
+ void configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to @ref CpuCast::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy);
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */
diff --git a/src/runtime/gpu/cl/operators/ClCast.cpp b/src/runtime/gpu/cl/operators/ClCast.cpp
new file mode 100644
index 0000000000..3f54004aa7
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClCast.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClCast.h"
+
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/kernels/ClCastKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
+{
+ auto k = std::make_unique<kernels::ClCastKernel>();
+ k->configure(compile_context, src, dst, policy);
+ _kernel = std::move(k);
+}
+
+Status ClCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
+{
+ return kernels::ClCastKernel::validate(src, dst, policy);
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClCast.h b/src/runtime/gpu/cl/operators/ClCast.h
new file mode 100644
index 0000000000..69e028debd
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClCast.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_CAST_H
+#define ARM_COMPUTE_CL_CAST_H
+
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to run @ref kernels::ClCastKernel */
+class ClCast : public IClOperator
+{
+public:
+ /** Constructor */
+ ClCast() = default;
+ /** Configure operator for a given list of arguments
+ *
+ * @note Input data type must be different than output data type.
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------------------------------|
+ * |U8 | S8, U16, S16, U32, S32, F16, F32 |
+ * |U16 | U8, S8, S16, U32, S32, F16, F32 |
+ * |S16 | U8, S8, U16, U32, S32, F16, F32 |
+ * |U32 | U8, S8, U16, S16, S32, F16, F32 |
+ * |S32 | U8, S8, U16, S16, U32, F16, F32 |
+ * |F16 | U8, S8, U16, S16, U32, F32 |
+ * |F32 | U8, S8, U16, S16, U32, F16 |
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[out] dst The destinatio tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy.
+ */
+ void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to @ref ClCast::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy);
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_CAST_H */