src/backends/neon/workloads/NeonGatherNdWorkload.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

//
// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//

#include "NeonGatherNdWorkload.hpp"
#include "NeonWorkloadUtils.hpp"
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
#include "backendsCommon/WorkloadUtils.hpp"

namespace armnn
{
arm_compute::Status NeonGatherNdWorkloadValidate(const TensorInfo& paramInfo,
                                                 const TensorInfo& indicesInfo,
                                                 const TensorInfo& outputInfo)
{
    // Calculate ND, K, W, C.
    std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramInfo, indicesInfo);

    /// Call Gather with adequate shapes
    // Reshape params into { K, C }
    armnn::TensorInfo params_K_C_Info =  paramInfo;
    params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });

    // Reshape indices into { W }
    armnn::TensorInfo indices_W_Info = indicesInfo;
    indices_W_Info.SetShape({ keyIndices["W"] });

    // Reshape output to have the shape given by gather { W, C }
    // (the original outputInfo has the shape given by gatherNd)
    armnn::TensorInfo outputGather_Info = outputInfo;
    outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });

    const arm_compute::TensorInfo aclParamsInfo  = BuildArmComputeTensorInfo(params_K_C_Info);
    const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_Info);
    const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(outputGather_Info);

    auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
    return arm_compute::NEGather::validate(&aclParamsInfo, &aclIndicesInfo, &aclOutputInfo, aclAxis);
}

NeonGatherNdWorkload::NeonGatherNdWorkload(const GatherNdQueueDescriptor& descriptor,
                                           const WorkloadInfo& info)
        : NeonBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
{
    m_Data.ValidateInputsOutputs("NeonGatherNdWorkload", 2, 1);

    TensorInfo paramsInfo  = info.m_InputTensorInfos[0];
    TensorInfo indicesInfo = info.m_InputTensorInfos[1];
    TensorInfo outputInfo  = info.m_OutputTensorInfos[0];

    arm_compute::ITensor& input   = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
    arm_compute::ITensor& indices = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
    arm_compute::ITensor& output  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();

    // Calculate ND, K, W, C.
    std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);

    /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
    /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
    /// but GeMM does not support s32 at the moment.

    // Prepare the tensor to store the output of the reduce_sum operation
    armnn::TensorInfo flattenedIndices_Info = indicesInfo;
    flattenedIndices_Info.SetShape({ keyIndices["W"] });
    BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
    armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);

    // Reshape indices into { W, ND }
    indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));

    // Calculate the m_FlattenedCoeff
    TensorShape paramsShape = paramsInfo.GetShape();
    std::vector<unsigned int> flattenedCoeff(keyIndices["ND"], 1);
    for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
    {
        flattenedCoeff[i - 1] = paramsShape[i];
    }
    for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
    {
        flattenedCoeff[i - 1] *= flattenedCoeff[i];
    }
    armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
    flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
    BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
    armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
    CopyArmComputeITensorData(flattenedCoeff.data(), m_FlattenedCoeff);

    // Prepare the tensor to store the output of the multiplication
    armnn::TensorInfo outputMul_Info = indicesInfo;
    outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
    BuildArmComputeTensor(m_outputMul, outputMul_Info);
    armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_outputMul);

    // Multiply
    auto convertPolicy = (IsQuantizedType(info.m_InputTensorInfos[0].GetDataType()) ||
                          IsQuantizedType(info.m_InputTensorInfos[1].GetDataType())) ?
                          arm_compute::ConvertPolicy::SATURATE :
                          arm_compute::ConvertPolicy::WRAP;

    m_MulLayer.configure(&indices,
                         &m_FlattenedCoeff,
                         &m_outputMul,
                         1.0f,
                         convertPolicy,
                         arm_compute::RoundingPolicy::TO_ZERO,
                         arm_compute::ActivationLayerInfo());

    // Reduce Sum
    const std::vector<unsigned int> armnnReduceAxes(1, 1);
    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_outputMul.info()->num_dimensions(),
                                                                          outputMul_Info.GetNumDimensions(),
                                                                          armnnReduceAxes);
    m_ReduceSumLayer.configure(&m_outputMul,
                               &m_FlattenedIndices,
                               static_cast<unsigned int>(coords[0]),
                               arm_compute::ReductionOperation::SUM,
                               false);

    /// Call Gather with adequate shapes
    // Reshape params into { K, C }
    paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
    input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));

    // Reshape output to have the shape given by gather { W, C }
    // (the original outputInfo has the shape given by gatherNd)
    armnn::TensorInfo outputGather_Info = outputInfo;
    outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
    BuildArmComputeTensor(m_outputGather, outputGather_Info);
    armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_outputGather);

    m_GatherLayer.configure(&input, &m_FlattenedIndices, &m_outputGather, ComputeAclAxis(0, paramsInfo));

    // Reshape output to the original output shape
    m_ReshapeLayer.configure(&m_outputGather, &output);
}

void NeonGatherNdWorkload::Execute() const
{
    ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonGatherNdWorkload_Execute", this->GetGuid());
    m_MulLayer.run();
    m_ReduceSumLayer.run();
    m_GatherLayer.run();
    m_ReshapeLayer.run();
}
} //namespace armnn