diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-04-28 10:20:18 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-05-19 11:38:32 +0000 |
commit | 11d8415aa57b69fb6c83e86a37e3026c22d1d37d (patch) | |
tree | 8f6bb12011ddc7275a8cc071dbf8ffe90a88e8eb /src/runtime | |
parent | 856f66e6c61b77d03f754cd0fa8439891f0e4aca (diff) | |
download | ComputeLibrary-11d8415aa57b69fb6c83e86a37e3026c22d1d37d.tar.gz |
Port DepthConvert to new Api
- Renames DepthConvert to Cast
- Ports both NEDepthConverLayer and CLDepthConvert variants
- Removes legacy shift capability from DepthConvert, allowing only
shifts of 0
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime')
-rw-r--r-- | src/runtime/CL/functions/CLCast.cpp | 41 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLDepthConvertLayer.cpp | 45 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 1 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 1 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp | 1 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 11 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLLSTMLayer.cpp | 1 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 1 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLQLSTMLayer.cpp | 1 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLRNNLayer.cpp | 1 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NECast.cpp | 42 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEDepthConvertLayer.cpp | 45 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuCast.cpp | 44 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuCast.h | 73 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClCast.cpp | 45 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClCast.h | 74 |
16 files changed, 386 insertions, 41 deletions
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp index 202140d8b9..53256ebed4 100644 --- a/src/runtime/CL/functions/CLCast.cpp +++ b/src/runtime/CL/functions/CLCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,31 @@ */ #include "arm_compute/runtime/CL/functions/CLCast.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Validate.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClCast.h" #include <utility> namespace arm_compute { +struct CLCast::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr<opencl::ClCast> op{ nullptr }; +}; + +CLCast::CLCast() + : _impl(std::make_unique<Impl>()) +{ +} +CLCast::CLCast(CLCast &&) = default; +CLCast &CLCast::operator=(CLCast &&) = default; +CLCast::~CLCast() = default; + void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy) { configure(CLKernelLibrary::get().get_compile_context(), input, output, policy); @@ -36,13 +55,23 @@ void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy void CLCast::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy) { - auto k = std::make_unique<CLDepthConvertLayerKernel>(); - k->configure(compile_context, input, output, policy, 0); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _impl->src = input; + _impl->dst = output; + + _impl->op = std::make_unique<opencl::ClCast>(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy); } Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy) { - return CLDepthConvertLayerKernel::validate(input, output, policy, 0); + return opencl::ClCast::validate(input, output, policy); +} + +void CLCast::run() +{ + ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } }; + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp index 47bc52364d..6aa370b23c 100644 --- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp +++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,31 @@ */ #include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Validate.h" +#include "src/core/CL/ICLKernel.h" +#include "src/runtime/gpu/cl/operators/ClCast.h" #include <utility> namespace arm_compute { +struct CLDepthConvertLayer::Impl +{ + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr<opencl::ClCast> op{ nullptr }; +}; + +CLDepthConvertLayer::CLDepthConvertLayer() + : _impl(std::make_unique<Impl>()) +{ +} +CLDepthConvertLayer::CLDepthConvertLayer(CLDepthConvertLayer &&) = default; +CLDepthConvertLayer &CLDepthConvertLayer::operator=(CLDepthConvertLayer &&) = default; +CLDepthConvertLayer::~CLDepthConvertLayer() = default; + void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift); @@ -36,13 +55,27 @@ void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, C void CLDepthConvertLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { - auto k = std::make_unique<CLDepthConvertLayerKernel>(); - k->configure(compile_context, input, output, policy, shift); - _kernel = std::move(k); + ARM_COMPUTE_UNUSED(shift); + + _impl->src = input; + _impl->dst = output; + + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst); + ARM_COMPUTE_ERROR_ON(shift != 0); + + _impl->op = std::make_unique<opencl::ClCast>(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), policy); } Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) { - return CLDepthConvertLayerKernel::validate(input, output, policy, shift); + ARM_COMPUTE_RETURN_ERROR_ON(shift != 0); + return opencl::ClCast::validate(input, output, policy); +} + +void CLDepthConvertLayer::run() +{ + ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } }; + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 991472bb7a..50a145f9ca 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -28,7 +28,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 5dc7556b2f..3184d5dfe0 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/kernels/CLCol2ImKernel.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp index 7a01018f59..d5d1b5f41e 100644 --- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp @@ -29,7 +29,6 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 099a2c980f..3be09581bd 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -34,12 +34,12 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" #include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h" +#include "src/core/gpu/cl/kernels/ClCastKernel.h" #include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h" @@ -189,7 +189,7 @@ inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type) CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), - _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()), + _weights_to_qasymm8(std::make_unique<opencl::kernels::ClCastKernel>()), _mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()), _mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()), _mtx_b_reshape_kernel(std::make_unique<opencl::kernels::ClGemmReshapeRhsMatrixKernel>()), @@ -272,7 +272,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con TensorInfo weights_info(*b->info()); weights_info.set_data_type(DataType::QASYMM8); _qasymm8_weights.allocator()->init(weights_info); - _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0); + _weights_to_qasymm8->configure(compile_context, b->info(), _qasymm8_weights.info(), ConvertPolicy::WRAP); } const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b; @@ -480,7 +480,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso { b_offset = -128; weights_info.set_data_type(DataType::QASYMM8); - ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0)); + ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCastKernel::validate(b, &weights_info, ConvertPolicy::WRAP)); } const ITensorInfo *matrix_b_info = &weights_info; if(reshape_matrix_b) @@ -681,7 +681,8 @@ void CLGEMMLowpMatrixMultiplyCore::prepare() if(_convert_to_qasymm8) { _qasymm8_weights.allocator()->allocate(); - CLScheduler::get().enqueue(*_weights_to_qasymm8, false); + ITensorPack convert_to_qs8_pack = { { ACL_SRC, _original_b }, { ACL_DST, &_qasymm8_weights } }; + CLScheduler::get().enqueue_op(*_weights_to_qasymm8, convert_to_qs8_pack, false); } if(_is_gemm_reshaped && _reshape_b_only_on_first_run) diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp index 146ac8f619..85d13c246e 100644 --- a/src/runtime/CL/functions/CLLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLLSTMLayer.cpp @@ -29,7 +29,6 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index 69974424c9..a44dcd2e24 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -27,7 +27,6 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp index 7b6ec8f5c8..fcf5b9d2a4 100644 --- a/src/runtime/CL/functions/CLQLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp index 45ced35782..755fa40121 100644 --- a/src/runtime/CL/functions/CLRNNLayer.cpp +++ b/src/runtime/CL/functions/CLRNNLayer.cpp @@ -28,7 +28,6 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" diff --git a/src/runtime/NEON/functions/NECast.cpp b/src/runtime/NEON/functions/NECast.cpp index a42f512ce6..b519576ad5 100644 --- a/src/runtime/NEON/functions/NECast.cpp +++ b/src/runtime/NEON/functions/NECast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 Arm Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,23 +23,45 @@ */ #include "arm_compute/runtime/NEON/functions/NECast.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" - -#include <utility> +#include "arm_compute/core/Validate.h" +#include "src/runtime/cpu/operators/CpuCast.h" namespace arm_compute { +struct NECast::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<cpu::CpuCast> op{ nullptr }; +}; + +NECast::NECast() + : _impl(std::make_unique<Impl>()) +{ +} +NECast::NECast(NECast &&) = default; +NECast &NECast::operator=(NECast &&) = default; +NECast::~NECast() = default; + void NECast::configure(ITensor *input, ITensor *output, ConvertPolicy policy) { - auto k = std::make_unique<NEDepthConvertLayerKernel>(); - k->configure(input, output, policy, 0); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst); + + _impl->op = std::make_unique<cpu::CpuCast>(); + _impl->op->configure(_impl->src->info(), _impl->dst->info(), policy); } Status NECast::validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy) { - return NEDepthConvertLayerKernel::validate(input, output, policy, 0); + return cpu::CpuCast::validate(input, output, policy); +} + +void NECast::run() +{ + ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } }; + _impl->op->run(pack); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp index 761de8eb60..07e985c25e 100644 --- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,20 +23,51 @@ */ #include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h" -#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "arm_compute/core/Validate.h" +#include "src/runtime/cpu/operators/CpuCast.h" #include <utility> -using namespace arm_compute; +namespace arm_compute +{ +struct NEDepthConvertLayer::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<cpu::CpuCast> op{ nullptr }; +}; + +NEDepthConvertLayer::NEDepthConvertLayer() + : _impl(std::make_unique<Impl>()) +{ +} +NEDepthConvertLayer::NEDepthConvertLayer(NEDepthConvertLayer &&) = default; +NEDepthConvertLayer &NEDepthConvertLayer::operator=(NEDepthConvertLayer &&) = default; +NEDepthConvertLayer::~NEDepthConvertLayer() = default; void NEDepthConvertLayer::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) { - auto k = std::make_unique<NEDepthConvertLayerKernel>(); - k->configure(input, output, policy, shift); - _kernel = std::move(k); + ARM_COMPUTE_UNUSED(shift); + + _impl->src = input; + _impl->dst = output; + + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->src, _impl->dst); + ARM_COMPUTE_ERROR_ON(shift != 0); + + _impl->op = std::make_unique<cpu::CpuCast>(); + _impl->op->configure(_impl->src->info(), _impl->dst->info(), policy); } Status NEDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) { - return NEDepthConvertLayerKernel::validate(input, output, policy, shift); + ARM_COMPUTE_RETURN_ERROR_ON(shift != 0); + return cpu::CpuCast::validate(input, output, policy); +} + +void NEDepthConvertLayer::run() +{ + ITensorPack pack = { { ACL_SRC, _impl->src }, { ACL_DST, _impl->dst } }; + _impl->op->run(pack); } +} // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuCast.cpp b/src/runtime/cpu/operators/CpuCast.cpp new file mode 100644 index 0000000000..5a4f6c518e --- /dev/null +++ b/src/runtime/cpu/operators/CpuCast.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/cpu/operators/CpuCast.h" + +#include "src/core/cpu/kernels/CpuCastKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +void CpuCast::configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy) +{ + auto k = std::make_unique<kernels::CpuCastKernel>(); + k->configure(src, dst, policy); + _kernel = std::move(k); +} + +Status CpuCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy) +{ + return kernels::CpuCastKernel::validate(src, dst, policy); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/runtime/cpu/operators/CpuCast.h b/src/runtime/cpu/operators/CpuCast.h new file mode 100644 index 0000000000..2aea2d2b09 --- /dev/null +++ b/src/runtime/cpu/operators/CpuCast.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_CAST_H +#define ARM_COMPUTE_CPU_CAST_H + +#include "src/runtime/cpu/ICpuOperator.h" + +namespace arm_compute +{ +namespace cpu +{ +/** Basic function to run @ref kernels::CpuCastKernel */ +class CpuCast : public ICpuOperator +{ +public: + /** Constructor */ + CpuCast() = default; + /** Configure operator for a given list of arguments + * + * Input data type must be different than output data type. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:-----------------------------------------------| + * |QASYMM8_SIGNED | S16, S32, F32, F16 | + * |QASYMM8 | U16, S16, S32, F32, F16 | + * |U8 | U16, S16, S32, F32, F16 | + * |U16 | U8, U32 | + * |S16 | QASYMM8_SIGNED, U8, S32 | + * |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 | + * |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 | + * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8| + * + * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] dst The destination tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + */ + void configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to @ref CpuCast::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy); +}; +} // namespace cpu +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */ diff --git a/src/runtime/gpu/cl/operators/ClCast.cpp b/src/runtime/gpu/cl/operators/ClCast.cpp new file mode 100644 index 0000000000..3f54004aa7 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClCast.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/gpu/cl/operators/ClCast.h" + +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/kernels/ClCastKernel.h" + +namespace arm_compute +{ +namespace opencl +{ +void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy) +{ + auto k = std::make_unique<kernels::ClCastKernel>(); + k->configure(compile_context, src, dst, policy); + _kernel = std::move(k); +} + +Status ClCast::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy) +{ + return kernels::ClCastKernel::validate(src, dst, policy); +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClCast.h b/src/runtime/gpu/cl/operators/ClCast.h new file mode 100644 index 0000000000..69e028debd --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClCast.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_CAST_H +#define ARM_COMPUTE_CL_CAST_H + +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to run @ref kernels::ClCastKernel */ +class ClCast : public IClOperator +{ +public: + /** Constructor */ + ClCast() = default; + /** Configure operator for a given list of arguments + * + * @note Input data type must be different than output data type. + * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src |dst | + * |:--------------|:--------------------------------------| + * |U8 | S8, U16, S16, U32, S32, F16, F32 | + * |U16 | U8, S8, S16, U32, S32, F16, F32 | + * |S16 | U8, S8, U16, U32, S32, F16, F32 | + * |U32 | U8, S8, U16, S16, S32, F16, F32 | + * |S32 | U8, S8, U16, S16, U32, F16, F32 | + * |F16 | U8, S8, U16, S16, U32, F32 | + * |F32 | U8, S8, U16, S16, U32, F16 | + * + * @param[in] compile_context The compile context to be used. + * @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] dst The destinatio tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + */ + void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to @ref ClCast::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy); +}; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CL_CAST_H */ |