aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/operators/CpuActivation.cpp
diff options
context:
space:
mode:
authorMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2022-09-20 11:49:23 +0100
committerMohmun02 <MohammedSuhail.Munshi@arm.com>2022-10-12 09:27:42 +0000
commitfa79fda2c797282de3589aaa69b06e065e8a21e0 (patch)
tree91020783a702c9a3be16dad076bca10321fe04c2 /src/cpu/operators/CpuActivation.cpp
parentc8cc024603cb1db084227196a52e562bf251d339 (diff)
downloadComputeLibrary-fa79fda2c797282de3589aaa69b06e065e8a21e0.tar.gz
Optimize Neon™ Logistic Activation
- Use a 1d execution window to improve memory access pattern. Resolves: [COMPMID-5465] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: Ida30669ffa06eb002ca43a6edf15e25a6eaad2f6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8344 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/operators/CpuActivation.cpp')
-rw-r--r--src/cpu/operators/CpuActivation.cpp10
1 files changed, 9 insertions, 1 deletions
diff --git a/src/cpu/operators/CpuActivation.cpp b/src/cpu/operators/CpuActivation.cpp
index 3945fa59a5..197e9850b9 100644
--- a/src/cpu/operators/CpuActivation.cpp
+++ b/src/cpu/operators/CpuActivation.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,6 +23,7 @@
*/
#include "src/cpu/operators/CpuActivation.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/common/IOperator.h"
#include "src/common/utils/LegacySupport.h"
#include "src/common/utils/Log.h"
@@ -46,6 +47,13 @@ Status CpuActivation::validate(const ITensorInfo *input, const ITensorInfo *outp
return kernels::CpuActivationKernel::validate(input, output, activation_info);
}
+void CpuActivation::run(ITensorPack &tensors)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+ auto split_dimension = static_cast<kernels::CpuActivationKernel *>(_kernel.get())->get_split_dimension_hint();
+ NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);
+}
+
std::tuple<IOperator *, StatusCode> CpuContext::create_activation(const AclTensorDescriptor &src, const AclTensorDescriptor &dst, const AclActivationDescriptor &act, bool is_validate)
{
TensorInfo src_info = detail::convert_to_legacy_tensor_info(src);