diff options
author | Teresa Charlin <teresa.charlinreyes@arm.com> | 2021-03-04 15:24:45 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-03-23 10:05:15 +0000 |
commit | d1dc09c95602ec1506bb4934aed1792752b5ffcf (patch) | |
tree | 8e3d337f4fc1bbc77b522a3e5b9ce49817fc85bf /src/runtime/NEON | |
parent | 226169fef38e2361f6b503570645c802c513112d (diff) | |
download | ComputeLibrary-d1dc09c95602ec1506bb4934aed1792752b5ffcf.tar.gz |
Port CpuTranspose to new API
Partially Resolves: COMPMID-4277 (2/2)
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Id8ee520081fe905cb796d4376864fa84ac384caa
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/303714
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: bsgcomp <bsgcomp@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5217
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON')
-rw-r--r-- | src/runtime/NEON/functions/NEFullyConnectedLayer.cpp | 37 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEPermute.cpp | 4 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NETranspose.cpp | 43 |
3 files changed, 65 insertions, 19 deletions
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index ec782fc163..0a5318ac30 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -39,9 +40,8 @@ #include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" -#include "src/core/NEON/kernels/NETransposeKernel.h" +#include "src/core/cpu/kernels/CpuTransposeKernel.h" -#include <algorithm> #include <cmath> namespace arm_compute @@ -142,16 +142,39 @@ Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const I } } // namespace +struct NEFullyConnectedLayerReshapeWeights::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<cpu::kernels::CpuTransposeKernel> op{ nullptr }; +}; + +NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights() + : _impl(std::make_unique<Impl>()) +{ +} + +NEFullyConnectedLayerReshapeWeights::~NEFullyConnectedLayerReshapeWeights() = default; + void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output) { - auto k = std::make_unique<NETransposeKernel>(); - k->configure(input, output); - _kernel = std::move(k); + _impl->op = std::make_unique<cpu::kernels::CpuTransposeKernel>(); + _impl->op->configure(input->info(), output->info()); + _impl->src = input; + _impl->dst = output; } Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output) { - return NETransposeKernel::validate(input, output); + return cpu::kernels::CpuTransposeKernel::validate(input, output); +} + +void NEFullyConnectedLayerReshapeWeights::run() +{ + ITensorPack pack{}; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + NEScheduler::get().schedule_op(_impl->op.get(), Window::DimY, _impl->op->window(), pack); } NEFullyConnectedLayer::~NEFullyConnectedLayer() = default; diff --git a/src/runtime/NEON/functions/NEPermute.cpp b/src/runtime/NEON/functions/NEPermute.cpp index 257c1a2e44..f707fad757 100644 --- a/src/runtime/NEON/functions/NEPermute.cpp +++ b/src/runtime/NEON/functions/NEPermute.cpp @@ -40,10 +40,6 @@ NEPermute::NEPermute() { } -NEPermute::NEPermute(NEPermute &&) = default; - -NEPermute &NEPermute::operator=(NEPermute &&) = default; - NEPermute::~NEPermute() = default; void NEPermute::configure(const ITensor *input, ITensor *output, const PermutationVector &perm) diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp index aaa52e36b9..3b3023f3b3 100644 --- a/src/runtime/NEON/functions/NETranspose.cpp +++ b/src/runtime/NEON/functions/NETranspose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,21 +23,48 @@ */ #include "arm_compute/runtime/NEON/functions/NETranspose.h" -#include "src/core/NEON/kernels/NETransposeKernel.h" - -#include <utility> +#include "arm_compute/core/Validate.h" +#include "src/runtime/cpu/operators/CpuTranspose.h" namespace arm_compute { +struct NETranspose::Impl +{ + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<cpu::CpuTranspose> op{ nullptr }; +}; + +NETranspose::NETranspose() + : _impl(std::make_unique<Impl>()) +{ +} + +NETranspose::~NETranspose() = default; + void NETranspose::configure(const ITensor *input, ITensor *output) { - auto k = std::make_unique<NETransposeKernel>(); - k->configure(input, output); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + _impl->src = input; + _impl->dst = output; + _impl->op = std::make_unique<cpu::CpuTranspose>(); + _impl->op->configure(input->info(), output->info()); } Status NETranspose::validate(const ITensorInfo *input, const ITensorInfo *output) { - return NETransposeKernel::validate(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ON_ERROR(cpu::CpuTranspose::validate(input, output)); + return Status{}; +} + +void NETranspose::run() +{ + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC, _impl->src); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); } + } // namespace arm_compute |