diff options
Diffstat (limited to 'src/runtime/CL/functions/CLPixelWiseMultiplication.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLPixelWiseMultiplication.cpp | 147 |
1 files changed, 106 insertions, 41 deletions
diff --git a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp index 3c1a7de76d..6aa9d9cbb3 100644 --- a/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp +++ b/src/runtime/CL/functions/CLPixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,67 +24,132 @@ #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h" -#include "support/MemorySupport.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +#include "src/core/CL/ICLKernel.h" +#include "src/gpu/cl/operators/ClMul.h" #include <utility> namespace arm_compute { -void CLPixelWiseMultiplication::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) +struct CLPixelWiseMultiplication::Impl +{ + const ICLTensor *src_0{nullptr}; + const ICLTensor *src_1{nullptr}; + ICLTensor *dst{nullptr}; + std::unique_ptr<opencl::ClMul> op{nullptr}; +}; + +CLPixelWiseMultiplication::CLPixelWiseMultiplication() : _impl(std::make_unique<Impl>()) +{ +} +CLPixelWiseMultiplication::CLPixelWiseMultiplication(CLPixelWiseMultiplication &&) = default; +CLPixelWiseMultiplication &CLPixelWiseMultiplication::operator=(CLPixelWiseMultiplication &&) = default; +CLPixelWiseMultiplication::~CLPixelWiseMultiplication() = default; + +void CLPixelWiseMultiplication::configure(ICLTensor *input1, + ICLTensor *input2, + ICLTensor *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, + const ActivationLayerInfo &act_info) +{ + configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, scale, overflow_policy, + rounding_policy, act_info); +} + +void CLPixelWiseMultiplication::configure(const CLCompileContext &compile_context, + ICLTensor *input1, + ICLTensor *input2, + ICLTensor *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, + const ActivationLayerInfo &act_info) { - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, scale, overflow_policy, rounding_policy, act_info); + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = std::make_unique<opencl::ClMul>(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), scale, overflow_policy, + rounding_policy, act_info); } -void CLPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) +Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + float scale, + ConvertPolicy overflow_policy, + RoundingPolicy rounding_policy, + const ActivationLayerInfo &act_info) { - auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseMultiplicationKernel>(); - k->configure(compile_context, input1, input2, output, scale, overflow_policy, rounding_policy, act_info); - _kernel = std::move(k); - - if(output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if(broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } + return opencl::ClMul::validate(input1, input2, output, scale, overflow_policy, rounding_policy, act_info); } -Status CLPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info) +void CLPixelWiseMultiplication::run() { - return CLPixelWiseMultiplicationKernel::validate(input1, input2, output, scale, overflow_policy, rounding_policy, act_info); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + + _impl->op->run(pack); } -void CLComplexPixelWiseMultiplication::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +struct CLComplexPixelWiseMultiplication::Impl +{ + const ICLTensor *src_0{nullptr}; + const ICLTensor *src_1{nullptr}; + ICLTensor *dst{nullptr}; + std::unique_ptr<opencl::ClComplexMul> op{nullptr}; +}; + +CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication() : _impl(std::make_unique<Impl>()) +{ +} +CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication(CLComplexPixelWiseMultiplication &&) = default; +CLComplexPixelWiseMultiplication & +CLComplexPixelWiseMultiplication::operator=(CLComplexPixelWiseMultiplication &&) = default; +CLComplexPixelWiseMultiplication::~CLComplexPixelWiseMultiplication() = default; + +void CLComplexPixelWiseMultiplication::configure(ICLTensor *input1, + ICLTensor *input2, + ICLTensor *output, + const ActivationLayerInfo &act_info) { configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info); } -void CLComplexPixelWiseMultiplication::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info) +void CLComplexPixelWiseMultiplication::configure(const CLCompileContext &compile_context, + ICLTensor *input1, + ICLTensor *input2, + ICLTensor *output, + const ActivationLayerInfo &act_info) { - auto k = arm_compute::support::cpp14::make_unique<CLComplexPixelWiseMultiplicationKernel>(); - k->configure(compile_context, input1, input2, output, act_info); - _kernel = std::move(k); - - if(output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if(broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } + _impl->src_0 = input1; + _impl->src_1 = input2; + _impl->dst = output; + _impl->op = std::make_unique<opencl::ClComplexMul>(); + _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), act_info); } -Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info) +Status CLComplexPixelWiseMultiplication::validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const ActivationLayerInfo &act_info) { - return CLComplexPixelWiseMultiplicationKernel::validate(input1, input2, output, act_info); + return opencl::ClComplexMul::validate(input1, input2, output, act_info); +} + +void CLComplexPixelWiseMultiplication::run() +{ + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + + _impl->op->run(pack); } } // namespace arm_compute |