aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon/workloads')
-rw-r--r--src/backends/neon/workloads/CMakeLists.txt2
-rw-r--r--src/backends/neon/workloads/NeonGatherNdWorkload.cpp147
-rw-r--r--src/backends/neon/workloads/NeonGatherNdWorkload.hpp41
-rw-r--r--src/backends/neon/workloads/NeonWorkloads.hpp1
4 files changed, 191 insertions, 0 deletions
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index 33a18e38da..8953dc9d4d 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -56,6 +56,8 @@ list(APPEND armnnNeonBackendWorkloads_sources
NeonFullyConnectedWorkload.hpp
NeonGatherWorkload.cpp
NeonGatherWorkload.hpp
+ NeonGatherNdWorkload.cpp
+ NeonGatherNdWorkload.hpp
NeonInstanceNormalizationWorkload.cpp
NeonInstanceNormalizationWorkload.hpp
NeonL2NormalizationFloatWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonGatherNdWorkload.cpp b/src/backends/neon/workloads/NeonGatherNdWorkload.cpp
new file mode 100644
index 0000000000..00c66cf9be
--- /dev/null
+++ b/src/backends/neon/workloads/NeonGatherNdWorkload.cpp
@@ -0,0 +1,147 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonGatherNdWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+#include "backendsCommon/WorkloadUtils.hpp"
+
+namespace armnn
+{
+arm_compute::Status NeonGatherNdWorkloadValidate(const TensorInfo& paramInfo,
+ const TensorInfo& indicesInfo,
+ const TensorInfo& outputInfo)
+{
+ // Calculate ND, K, W, C.
+ std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramInfo, indicesInfo);
+
+ /// Call Gather with adequate shapes
+ // Reshape params into { K, C }
+ armnn::TensorInfo params_K_C_Info = paramInfo;
+ params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
+
+ // Reshape indices into { W }
+ armnn::TensorInfo indices_W_Info = indicesInfo;
+ indices_W_Info.SetShape({ keyIndices["W"] });
+
+ // Reshape output to have the shape given by gather { W, C }
+ // (the original outputInfo has the shape given by gatherNd)
+ armnn::TensorInfo outputGather_Info = outputInfo;
+ outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
+
+ const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
+ const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_Info);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputGather_Info);
+
+ auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
+ return arm_compute::NEGather::validate(&aclParamsInfo, &aclIndicesInfo, &aclOutputInfo, aclAxis);
+}
+
+NeonGatherNdWorkload::NeonGatherNdWorkload(const GatherNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : NeonBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonGatherNdWorkload", 2, 1);
+
+ TensorInfo paramsInfo = info.m_InputTensorInfos[0];
+ TensorInfo indicesInfo = info.m_InputTensorInfos[1];
+ TensorInfo outputInfo = info.m_OutputTensorInfos[0];
+
+ arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& indices = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ // Calculate ND, K, W, C.
+ std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
+
+ /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
+ /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
+ /// but GeMM does not support s32 at the moment.
+
+ // Prepare the tensor to store the output of the reduce_sum operation
+ armnn::TensorInfo flattenedIndices_Info = indicesInfo;
+ flattenedIndices_Info.SetShape({ keyIndices["W"] });
+ BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);
+
+ // Reshape indices into { W, ND }
+ indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));
+
+ // Calculate the m_FlattenedCoeff
+ TensorShape paramsShape = paramsInfo.GetShape();
+ std::vector<unsigned int> flattenedCoeff(keyIndices["ND"], 1);
+ for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
+ {
+ flattenedCoeff[i - 1] = paramsShape[i];
+ }
+ for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
+ {
+ flattenedCoeff[i - 1] *= flattenedCoeff[i];
+ }
+ armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
+ flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
+ BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
+ CopyArmComputeITensorData(flattenedCoeff.data(), m_FlattenedCoeff);
+
+ // Prepare the tensor to store the output of the multiplication
+ armnn::TensorInfo outputMul_Info = indicesInfo;
+ outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
+ BuildArmComputeTensor(m_outputMul, outputMul_Info);
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_outputMul);
+
+ // Multiply
+ auto convertPolicy = (IsQuantizedType(info.m_InputTensorInfos[0].GetDataType()) ||
+ IsQuantizedType(info.m_InputTensorInfos[1].GetDataType())) ?
+ arm_compute::ConvertPolicy::SATURATE :
+ arm_compute::ConvertPolicy::WRAP;
+
+ m_MulLayer.configure(&indices,
+ &m_FlattenedCoeff,
+ &m_outputMul,
+ 1.0f,
+ convertPolicy,
+ arm_compute::RoundingPolicy::TO_ZERO,
+ arm_compute::ActivationLayerInfo());
+
+ // Reduce Sum
+ const std::vector<unsigned int> armnnReduceAxes(1, 1);
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_outputMul.info()->num_dimensions(),
+ outputMul_Info.GetNumDimensions(),
+ armnnReduceAxes);
+ m_ReduceSumLayer.configure(&m_outputMul,
+ &m_FlattenedIndices,
+ static_cast<unsigned int>(coords[0]),
+ arm_compute::ReductionOperation::SUM,
+ false);
+
+ /// Call Gather with adequate shapes
+ // Reshape params into { K, C }
+ paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
+ input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));
+
+ // Reshape output to have the shape given by gather { W, C }
+ // (the original outputInfo has the shape given by gatherNd)
+ armnn::TensorInfo outputGather_Info = outputInfo;
+ outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
+ BuildArmComputeTensor(m_outputGather, outputGather_Info);
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_outputGather);
+
+ m_GatherLayer.configure(&input, &m_FlattenedIndices, &m_outputGather, ComputeAclAxis(0, paramsInfo));
+
+ // Reshape output to the original output shape
+ m_ReshapeLayer.configure(&m_outputGather, &output);
+}
+
+void NeonGatherNdWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonGatherNdWorkload_Execute", this->GetGuid());
+ m_MulLayer.run();
+ m_ReduceSumLayer.run();
+ m_GatherLayer.run();
+ m_ReshapeLayer.run();
+}
+} //namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonGatherNdWorkload.hpp b/src/backends/neon/workloads/NeonGatherNdWorkload.hpp
new file mode 100644
index 0000000000..848aac667b
--- /dev/null
+++ b/src/backends/neon/workloads/NeonGatherNdWorkload.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "NeonBaseWorkload.hpp"
+
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/NEON/functions/NEGather.h"
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+
+namespace armnn
+{
+arm_compute::Status NeonGatherNdWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& indices,
+ const TensorInfo& output);
+
+class NeonGatherNdWorkload : public NeonBaseWorkload<GatherNdQueueDescriptor>
+{
+public:
+ NeonGatherNdWorkload(const GatherNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ arm_compute::Tensor m_FlattenedCoeff;
+ arm_compute::Tensor m_outputMul;
+ arm_compute::Tensor m_FlattenedIndices;
+ arm_compute::Tensor m_outputGather;
+
+ mutable arm_compute::NEPixelWiseMultiplication m_MulLayer;
+ mutable arm_compute::NEReductionOperation m_ReduceSumLayer;
+ mutable arm_compute::NEGather m_GatherLayer;
+ mutable arm_compute::NEReshapeLayer m_ReshapeLayer;
+
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index 8b99f03a7f..024f1ca983 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -31,6 +31,7 @@
#include "NeonFloorFloatWorkload.hpp"
#include "NeonFullyConnectedWorkload.hpp"
#include "NeonGatherWorkload.hpp"
+#include "NeonGatherNdWorkload.hpp"
#include "NeonInstanceNormalizationWorkload.hpp"
#include "NeonL2NormalizationFloatWorkload.hpp"
#include "NeonLogWorkload.hpp"