patch/23.02/classarmnn_1_1_cl_gather_nd_workload.xhtml

         : ClBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
 {
     m_Data.ValidateInputsOutputs("ClGatherNdWorkload", 2, 1);

     TensorInfo paramsInfo  = info.m_InputTensorInfos[0];
     TensorInfo indicesInfo = info.m_InputTensorInfos[1];
     TensorInfo outputInfo  = info.m_OutputTensorInfos[0];

     arm_compute::ICLTensor& input   = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& indices = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
     arm_compute::ICLTensor& output  = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();

     // Calculate ND, K, W, C.
     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);

     /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
     /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
     /// but GeMM does not support s32 at the moment.

     // Prepare the tensor to store the output of the reduce_sum operation
     armnn::TensorInfo flattenedIndices_Info = indicesInfo;
     flattenedIndices_Info.SetShape({ keyIndices["W"] });
     BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);

     // Reshape indices into { W, ND }
     indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));

     // Calculate the m_FlattenedCoeff
     TensorShape paramsShape = paramsInfo.GetShape();
     std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);
     for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
     {
         flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);
     }
     for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
     {
         flattenedCoeff[i - 1] *= flattenedCoeff[i];
     }
     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
     BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
     ARMNN_ASSERT_MSG(indicesInfo.GetDataType() == DataType::Signed32,
                      "flattenedCoeff must be same data type as m_FlattenedCoeff");
     CopyArmComputeClTensorData<int32_t>(m_FlattenedCoeff, flattenedCoeff.data());

     // Prepare the tensor to store the output of the multiplication
     armnn::TensorInfo outputMul_Info = indicesInfo;
     outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
     BuildArmComputeTensor(m_OutputMul, outputMul_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);

     // Multiply
     m_MulLayer.configure(clCompileContext,
                          &indices,
                          &m_FlattenedCoeff,
                          &m_OutputMul,
                          1.0f,
                          arm_compute::ConvertPolicy::WRAP,
                          arm_compute::RoundingPolicy::TO_ZERO,
                          arm_compute::ActivationLayerInfo());

     // Reduce Sum
     const std::vector<unsigned int> armnnReduceAxes(1, 1);
     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),
                                                                           outputMul_Info.GetNumDimensions(),
                                                                           armnnReduceAxes);
     m_ReduceSumLayer.configure(clCompileContext,
                                &m_OutputMul,
                                &m_FlattenedIndices,
                                static_cast<unsigned int>(coords[0]),
                                arm_compute::ReductionOperation::SUM,
                                false);

     /// Call Gather with adequate shapes
     // Reshape params into { K, C }
     paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
     input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));

     // Reshape output to have the shape given by gather { W, C }
     // (the original outputInfo has the shape given by gatherNd)
     armnn::TensorInfo outputGather_Info = outputInfo;
     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
     BuildArmComputeTensor(m_OutputGather, outputGather_Info);
     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClGatherNdWorkload_configure");
         auto aclAxis = ComputeAclAxis(0, paramsInfo);
         m_GatherLayer.configure(clCompileContext, &input, &m_FlattenedIndices, &m_OutputGather, aclAxis);
     }

     // Reshape output to the original output shape
     m_ReshapeLayer.configure(clCompileContext, &m_OutputGather, &output);
 };
armnn::ComputeAclAxis
int ComputeAclAxis(const int &armnnAxis, const armnn::TensorInfo &tensor)
Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank...
Definition: ArmComputeUtils.hpp:264

-armnn::CalculateGatherNdKeyIndices
std::map< std::string, unsigned int > CalculateGatherNdKeyIndices(TensorInfo inputInfo0, TensorInfo inputInfo1)
Calculates the key index values needed for GatherNd: N, ND, K, W, C (N is always 1) ...
Definition: WorkloadUtils.cpp:300

-armnn::TensorInfo
Definition: Tensor.hpp:152

-armnn::DataType::Signed32

-armnn::Coordinates
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
Definition: InternalTypes.hpp:15

-armnn::QueueDescriptor::ValidateInputsOutputs
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
Definition: WorkloadData.cpp:475

-ARMNN_SCOPED_PROFILING_EVENT
#define ARMNN_SCOPED_PROFILING_EVENT(backendId, name)
Definition: Profiling.hpp:220

-armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:193

-ARMNN_ASSERT_MSG
#define ARMNN_ASSERT_MSG(COND, MSG)
Definition: Assert.hpp:15

-armnn::BaseWorkload< GatherNdQueueDescriptor >::m_Data
GatherNdQueueDescriptor m_Data
Definition: Workload.hpp:83

-armnn::Compute::Undefined

-armnn::QueueDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkloadData.hpp:27

-armnn::BoostLogSeverityMapping::info

-armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26

+         : ClBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)

+ {

+     m_Data.ValidateInputsOutputs("ClGatherNdWorkload", 2, 1);

+

+     TensorInfo paramsInfo  = info.m_InputTensorInfos[0];

+     TensorInfo indicesInfo = info.m_InputTensorInfos[1];

+     TensorInfo outputInfo  = info.m_OutputTensorInfos[0];

+

+     arm_compute::ICLTensor& input   = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();

+     arm_compute::ICLTensor& indices = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();

+     arm_compute::ICLTensor& output  = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();

+

+     // Calculate ND, K, W, C.

+     std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);

+

+     /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.

+     /// This could be done using MatMul instead of multiplication followed by reduce sum operation,

+     /// but GeMM does not support s32 at the moment.

+

+     // Prepare the tensor to store the output of the reduce_sum operation

+     armnn::TensorInfo flattenedIndices_Info = indicesInfo;

+     flattenedIndices_Info.SetShape({ keyIndices["W"] });

+     BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);

+     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);

+

+     // Reshape indices into { W, ND }

+     indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));

+

+     // Calculate the m_FlattenedCoeff

+     TensorShape paramsShape = paramsInfo.GetShape();

+     std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);

+     for (unsigned int i = 1; i < keyIndices["ND"]; ++i)

+     {

+         flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);

+     }

+     for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)

+     {

+         flattenedCoeff[i - 1] *= flattenedCoeff[i];

+     }

+     armnn::TensorInfo flattenedCoeff_Info = indicesInfo;

+     flattenedCoeff_Info.SetShape({ keyIndices["ND"] });

+     BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);

+     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);

+     ARMNN_ASSERT_MSG(indicesInfo.GetDataType() == DataType::Signed32,

+                      "flattenedCoeff must be same data type as m_FlattenedCoeff");

+     CopyArmComputeClTensorData<int32_t>(m_FlattenedCoeff, flattenedCoeff.data());

+

+     // Prepare the tensor to store the output of the multiplication

+     armnn::TensorInfo outputMul_Info = indicesInfo;

+     outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });

+     BuildArmComputeTensor(m_OutputMul, outputMul_Info);

+     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);

+

+     // Multiply

+     m_MulLayer.configure(clCompileContext,

+                          &indices,

+                          &m_FlattenedCoeff,

+                          &m_OutputMul,

+                          1.0f,

+                          arm_compute::ConvertPolicy::WRAP,

+                          arm_compute::RoundingPolicy::TO_ZERO,

+                          arm_compute::ActivationLayerInfo());

+

+     // Reduce Sum

+     const std::vector<unsigned int> armnnReduceAxes(1, 1);

+     arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),

+                                                                           outputMul_Info.GetNumDimensions(),

+                                                                           armnnReduceAxes);

+     m_ReduceSumLayer.configure(clCompileContext,

+                                &m_OutputMul,

+                                &m_FlattenedIndices,

+                                static_cast<unsigned int>(coords[0]),

+                                arm_compute::ReductionOperation::SUM,

+                                false);

+

+     /// Call Gather with adequate shapes

+     // Reshape params into { K, C }

+     paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });

+     input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));

+

+     // Reshape output to have the shape given by gather { W, C }

+     // (the original outputInfo has the shape given by gatherNd)

+     armnn::TensorInfo outputGather_Info = outputInfo;

+     outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });

+     BuildArmComputeTensor(m_OutputGather, outputGather_Info);

+     armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);

+     {

+         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClGatherNdWorkload_configure");

+         auto aclAxis = ComputeAclAxis(0, paramsInfo);

+         m_GatherLayer.configure(clCompileContext, &input, &m_FlattenedIndices, &m_OutputGather, aclAxis);

+     }

+

+     // Reshape output to the original output shape

+     m_ReshapeLayer.configure(clCompileContext, &m_OutputGather, &output);

+ };


+References armnn::CalculateGatherNdKeyIndices(), armnn::info, BaseWorkload< GatherNdQueueDescriptor >::m_Data, QueueDescriptor::m_Inputs, QueueDescriptor::m_Outputs, TensorInfo::SetShape(), and QueueDescriptor::ValidateInputsOutputs().
+
Public Member Functions
	ClGatherNdWorkload (const GatherNdQueueDescriptor &descriptor, const WorkloadInfo &info, const arm_compute::CLCompileContext &clCompileContext)
	ClGatherNdWorkload (const GatherNdQueueDescriptor &descriptor, const WorkloadInfo &info, const arm_compute::CLCompileContext &clCompileContext)

virtual void	Execute () const override

Public Member Functions inherited from IWorkload
virtual	~IWorkload ()

virtual arm::pipe::ProfilingGuid	GetGuid () const =0

virtual bool	SupportsTensorHandleReplacement () const =0

virtual void	RegisterDebugCallback (const DebugCallbackFunction &)

virtual armnn::Optional< armnn::MemoryRequirements >	GetMemoryRequirements ()