diff options
author | Teresa Charlin <teresa.charlinreyes@arm.com> | 2021-02-25 20:15:01 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-03-29 20:23:11 +0000 |
commit | 2788609b8a10306e9eae47543b39812a7b075aaa (patch) | |
tree | 81515046e0c06d6a21ecdcebfe083ea5922fea0c /src/runtime/CL/functions/CLFullyConnectedLayer.cpp | |
parent | f9a611a1fd309bb9a906c99eede5e6b7bceba26b (diff) | |
download | ComputeLibrary-2788609b8a10306e9eae47543b39812a7b075aaa.tar.gz |
Port ClTranspose to new API
Partially Resolves: COMPMID-4277 (1/2)
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: I704c2303135cbe1ba46d2fd5642c84c562204bc7
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5194
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLFullyConnectedLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 36 |
1 files changed, 30 insertions, 6 deletions
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 46a90a54b7..176c5bebd5 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ #include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" -#include "src/core/CL/kernels/CLTransposeKernel.h" +#include "src/core/gpu/cl/kernels/ClTransposeKernel.h" #include "support/Cast.h" #include <algorithm> @@ -141,6 +141,20 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I } } // namespace +struct CLFullyConnectedLayerReshapeWeights::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<opencl::kernels::ClTransposeKernel> op{ nullptr }; +}; + +CLFullyConnectedLayerReshapeWeights::CLFullyConnectedLayerReshapeWeights() + : _impl(std::make_unique<Impl>()) +{ +} + +CLFullyConnectedLayerReshapeWeights::~CLFullyConnectedLayerReshapeWeights() = default; + void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -148,14 +162,23 @@ void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLT void CLFullyConnectedLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = std::make_unique<CLTransposeKernel>(); - k->configure(compile_context, input, output); - _kernel = std::move(k); + _impl->src = input; + _impl->dst = output; + _impl->op = std::make_unique<opencl::kernels::ClTransposeKernel>(); + _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info()); } Status CLFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLTransposeKernel::validate(input, output); + return opencl::kernels::ClTransposeKernel::validate(input, output); +} + +void CLFullyConnectedLayerReshapeWeights::run() +{ + ITensorPack pack{}; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + CLScheduler::get().enqueue_op(*_impl->op.get(), pack, false); } CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) @@ -164,6 +187,7 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem _are_weights_reshaped(true), _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr) { } + void CLFullyConnectedLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info) { |