From 4452baf3d295164877c5810a3867b1d2d79b04f3 Mon Sep 17 00:00:00 2001
From: Cathal Corbett <cathal.corbett@arm.com>
Date: Fri, 13 May 2022 09:55:59 +0100
Subject: IVGCVSW-6260 ConstTensorsAsInput: Fully Connected Cl and Neon
 support.

  * IVGCVSW-6940 ConstTensorsAsInput: DepthwiseConvolution2d - Complete Neon and Cl Bug Fix
  * Bug fix to enable Cl and Neon Backend Compatibility ConstantTensorsAsInputs
  * Updated Cl and Neon FullyConnected workloads to handle constant
    weights and bias as inputs rather than reading from member variables.
  * Prevent non const weights and biases passing CL and NEON validate
    for Depthwise Convolution.

Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Change-Id: I0f505ff5998a183152f843d0f6cc74327ba920e7
---
 src/backends/aclCommon/ArmComputeTensorUtils.cpp   |  4 ++
 .../backendsCommon/test/CompatibilityTests.cpp     |  5 +-
 src/backends/cl/ClBackend.cpp                      |  9 ++--
 src/backends/cl/ClBackend.hpp                      |  2 +-
 .../workloads/ClDepthwiseConvolutionWorkload.cpp   | 19 ++++++-
 .../cl/workloads/ClFullyConnectedWorkload.cpp      | 61 ++++++++++-----------
 .../cl/workloads/ClFullyConnectedWorkload.hpp      |  5 --
 src/backends/neon/NeonBackend.cpp                  | 10 ++--
 src/backends/neon/NeonBackend.hpp                  |  2 +-
 src/backends/neon/test/NeonLayerTests.cpp          |  6 +--
 .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 19 ++++++-
 .../neon/workloads/NeonFullyConnectedWorkload.cpp  | 62 +++++++++-------------
 .../neon/workloads/NeonFullyConnectedWorkload.hpp  |  2 -
 13 files changed, 113 insertions(+), 93 deletions(-)

diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
index 1960332ccf..01c071d43b 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
@@ -147,6 +147,10 @@ arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& te
 
 // Utility function used to build a TensorInfo object, that can be used to initialise
 // ARM Compute Tensor and CLTensor allocators.
+// Note: this utility ignores the value of armnn::TensorInfo.IsConstant(). ACL tensors
+// default to constant but Arm NN ones default to non constant. In the cases where
+// we expect ACL to treat a tensor as constant that value must be set after this
+// utility has been called.
 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo)
 {
     bool multiScales = tensorInfo.HasMultipleQuantizationScales();
diff --git a/src/backends/backendsCommon/test/CompatibilityTests.cpp b/src/backends/backendsCommon/test/CompatibilityTests.cpp
index 3685f75986..c69a4b5f91 100644
--- a/src/backends/backendsCommon/test/CompatibilityTests.cpp
+++ b/src/backends/backendsCommon/test/CompatibilityTests.cpp
@@ -131,7 +131,6 @@ TEST_SUITE("BackendCapability")
 
 namespace
 {
-
 #if defined(ARMNNREF_ENABLED) || defined(ARMCOMPUTENEON_ENABLED) || defined(ARMCOMPUTECL_ENABLED)
 void CapabilityTestHelper(BackendCapabilities &capabilities,
                           std::vector<std::pair<std::string, bool>> capabilityVector)
@@ -198,7 +197,7 @@ TEST_CASE ("Neon_Backends_Capability_Test")
                          {{"NonConstWeights", false},
                           {"AsyncExecution", false},
                           {"ProtectedContentAllocation", false},
-                          {"ConstantTensorsAsInputs", false},
+                          {"ConstantTensorsAsInputs", true},
                           {"PreImportIOTensors", false},
                           {"ExternallyManagedMemory", true},
                           {"MultiAxisPacking", false}});
@@ -217,7 +216,7 @@ TEST_CASE ("Cl_Backends_Capability_Test")
                          {{"NonConstWeights", false},
                           {"AsyncExecution", false},
                           {"ProtectedContentAllocation", true},
-                          {"ConstantTensorsAsInputs", false},
+                          {"ConstantTensorsAsInputs", true},
                           {"PreImportIOTensors", false},
                           {"ExternallyManagedMemory", true},
                           {"MultiAxisPacking", false}});
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index ed6f221511..47990d87dc 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -398,18 +398,19 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
                             else if (base.GetType() == LayerType::FullyConnected)
                             {
                                 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
+                                FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
 
+                                // As bias is optional only try to get TensorInfo from input if bias is enabled.
                                 Optional<TensorInfo> biases;
-
-                                if (baseLayer->GetParameters().m_BiasEnabled)
+                                if (descriptor.m_BiasEnabled)
                                 {
-                                    biases = baseLayer->m_Bias->GetTensorInfo();
+                                    biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
                                 }
 
                                 arm_compute::Status status = ClFullyConnectedWorkloadValidate(
                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
-                                        baseLayer->m_Weight->GetTensorInfo(),
+                                        baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
                                         biases,
                                         baseLayer->GetParameters(),
                                         &activationDesc);
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index 99fe9069ff..ce56c3025c 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -27,7 +27,7 @@ const BackendCapabilities gpuAccCapabilities("GpuAcc",
                                                      {"NonConstWeights", false},
                                                      {"AsyncExecution", false},
                                                      {"ProtectedContentAllocation", true},
-                                                     {"ConstantTensorsAsInputs", false},
+                                                     {"ConstantTensorsAsInputs", true},
                                                      {"PreImportIOTensors", false},
                                                      {"ExternallyManagedMemory", true},
                                                      {"MultiAxisPacking", false},
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9a4cad3ef0..3a972d3f39 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -30,6 +30,15 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
                                                            const Optional<TensorInfo>& biases,
                                                            const ActivationDescriptor* activationDescriptor)
 {
+    // The CL implemented workload does support both const and non const
+    // weights. However, in the case of non const weights we'd have to call
+    // prepare or configure for each inference which we're not setup to do just yet.
+    if (!weights.IsConstant())
+    {
+        return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                   "ArmNN ClDepthwiseConv2dWorkload does not support non constant weights."};
+    }
+
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input,  descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
@@ -47,14 +56,22 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
 
     // Convert the weights into the compute library format
-    const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+    arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+    aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
+        // Same for bias as weights. We don't currently support non const.
+        if (!biases.value().IsConstant())
+        {
+            return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                       "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
+        }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
+        aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index 017f4fff6b..c2da5f297a 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -23,22 +23,37 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
                                                      const FullyConnectedDescriptor& descriptor,
                                                      const ActivationDescriptor* activationDescriptor)
 {
+    // The CL implemented workload does support both const and non const
+    // weights. However, in the case of non const weights we'd have to call
+    // prepare or configure for each inference which we're not setup to do just yet.
+    if (!weights.IsConstant())
+    {
+        return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                    "Arm NN ClFullyConnectedWorkload does not support non constant weights."};
+    }
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
-    const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+    arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+    aclWeights.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiases;
     arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
+        // Same for bias as weights. We don't currently support non const.
+        if (!biases.value().IsConstant())
+        {
+            return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                        "Arm NN ClFullyConnectedWorkload does not support non constant bias."};
+        }
         aclBiases = BuildArmComputeTensorInfo(biases.value());
+        aclBiases.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiases = &aclBiases;
     }
 
     const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
-
     return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
                                                         &aclWeights,
                                                         optionalAclBiases,
@@ -70,46 +85,34 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
                                          detailsInfo,
                                          this->GetGuid());
 
-    m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
-    BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+    m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", descriptor.m_Parameters.GetNumInputs(),
+                                 1);
 
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& weights = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+
+    arm_compute::ICLTensor* bias  = nullptr;
     if (m_Data.m_Parameters.m_BiasEnabled)
     {
-        m_BiasesTensor = std::make_unique<arm_compute::CLTensor>();
-        BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+        bias = &PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
     }
 
-    m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1);
-
-    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
-
     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
 
     arm_compute::FullyConnectedLayerInfo fc_info =
-        ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+            ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters,
+                                                                        activationInfo);
 
     {
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFullyConnectedWorkload_configure");
         m_FullyConnectedLayer.configure(clCompileContext,
                                         &input,
-                                        m_WeightsTensor.get(),
-                                        m_BiasesTensor.get(),
+                                        &weights,
+                                        bias,
                                         &output,
                                         fc_info);
     }
-
-    InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
-
-    if (m_BiasesTensor)
-    {
-        InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias);
-    }
-
-    // Force Compute Library to perform the necessary copying and reshaping, after which
-    // delete all the input tensors that will no longer be needed
-    m_FullyConnectedLayer.prepare();
-    FreeUnusedTensors();
 }
 
 void ClFullyConnectedWorkload::Execute() const
@@ -118,10 +121,4 @@ void ClFullyConnectedWorkload::Execute() const
     RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION());
 }
 
-void ClFullyConnectedWorkload::FreeUnusedTensors()
-{
-    FreeTensorIfUnused(m_WeightsTensor);
-    FreeTensorIfUnused(m_BiasesTensor);
-}
-
 } //namespace armnn
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
index 3ab9f986a8..214c7fcb8b 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
@@ -35,11 +35,6 @@ public:
 
 private:
     mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer;
-
-    std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor;
-    std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor;
-
-    void FreeUnusedTensors();
 };
 
 } //namespace armnn
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 7a258c38e0..39ad4b9f32 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -250,17 +250,19 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
                             else if (base.GetType() == LayerType::FullyConnected)
                             {
                                 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
-                                Optional<TensorInfo> biases;
+                                FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
 
-                                if (baseLayer->GetParameters().m_BiasEnabled)
+                                // As bias is optional only try to get TensorInfo from input if bias is enabled.
+                                Optional<TensorInfo> biases;
+                                if (descriptor.m_BiasEnabled)
                                 {
-                                    biases = baseLayer->m_Bias->GetTensorInfo();
+                                    biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
                                 }
 
                                 arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
-                                        baseLayer->m_Weight->GetTensorInfo(),
+                                        baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
                                         biases,
                                         baseLayer->GetParameters(),
                                         &activationDesc);
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index e53bacb84a..e3e3782a7f 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -15,7 +15,7 @@ const BackendCapabilities cpuAccCapabilities("CpuAcc",
                                                      {"NonConstWeights", false},
                                                      {"AsyncExecution", false},
                                                      {"ProtectedContentAllocation", false},
-                                                     {"ConstantTensorsAsInputs", false},
+                                                     {"ConstantTensorsAsInputs", true},
                                                      {"PreImportIOTensors", false},
                                                      {"ExternallyManagedMemory", true},
                                                      {"MultiAxisPacking", false},
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index d71ce1f2bf..b813c33763 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -373,7 +373,7 @@ TEST_CASE("DepthwiseConv2dUtils")
 
     TensorInfo inputInfo({1, 1, 10, 10 }, dataType);
     TensorInfo outputInfo;
-    TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType); // [1,H,W,I*M]
+    TensorInfo weightsInfo3x3({ 1, 3, 3, 1 }, dataType, 0, 0, true); // [1,H,W,I*M]
     TensorInfo biasesInfo;
 
     DepthwiseConvolution2dDescriptor descriptor;
@@ -432,14 +432,14 @@ TEST_CASE("DepthwiseConv2dUtils")
                                                             weightsInfo3x3, biasesInfo));
 
     // Supported weights shape 1x1
-    TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, DataType::Float32);
+    TensorInfo weightsInfo1x1({ 1, 1, 1, 1 }, DataType::Float32, 0, 0, true);
     descriptor = MakeDepthwiseConv2dDesc(1, 1);
     outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo1x1, descriptor, dataType);
     CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor,
                                                             weightsInfo1x1, biasesInfo));
 
     // Supported shape 2x2
-    TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32);
+    TensorInfo weightsInfo2x2({ 1, 2, 2, 1 }, DataType::Float32, 0, 0, true);
     descriptor = MakeDepthwiseConv2dDesc(1, 1);
     outputInfo = CreateOutputTensorInfo(inputInfo, weightsInfo2x2, descriptor, dataType);
     CHECK(layerSupport.IsDepthwiseConvolutionSupported(inputInfo, outputInfo, descriptor,
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index b122be62ce..9eeac6e2a3 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -33,6 +33,15 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
                                                              const Optional<TensorInfo>& biases,
                                                              const ActivationDescriptor* activationDescriptor)
 {
+    // The Neon implemented workload does support both const and non const
+    // weights. However, in the case of non const weights we'd have to call
+    // prepare or configure for each inference which we're not setup to do just yet.
+    if (!weights.IsConstant())
+    {
+        return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                   "ArmNN NeonDepthwiseConv2dWorkload does not support non constant weights."};
+    }
+
     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
@@ -50,14 +59,22 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
 
     // Convert the weights into the compute library format
-    const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+    arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
+    aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiasesInfo;
     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
+        // Same for bias as weights. We don't currently support non const.
+        if (!biases.value().IsConstant())
+        {
+            return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                       "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."};
+        }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
+        aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 26c68b7d1d..d3716806b3 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -28,22 +28,37 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
                                                        const FullyConnectedDescriptor& descriptor,
                                                        const ActivationDescriptor* activationDescriptor)
 {
+    // The NEON implemented workload does support both const and non const
+    // weights. However, in the case of non const weights we'd have to call
+    // prepare or configure for each inference which we're not setup to do just yet.
+    if (!weights.IsConstant())
+    {
+        return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                    "Arm NN NeonFullyConnectedWorkload does not support non constant weights."};
+    }
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
-    const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+    arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+    aclWeights.set_are_values_constant(weights.IsConstant());
 
     arm_compute::TensorInfo aclBiases;
     arm_compute::TensorInfo* optionalAclBiases = nullptr;
     if (descriptor.m_BiasEnabled)
     {
         ARMNN_ASSERT(biases.has_value());
+        // Same for bias as weights. We don't currently support non const.
+        if (!biases.value().IsConstant())
+        {
+            return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+                                        "Arm NN NeonFullyConnectedWorkload does not support non constant bias."};
+        }
         aclBiases = BuildArmComputeTensorInfo(biases.value());
+        aclBiases.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiases = &aclBiases;
     }
 
     const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor);
-
     return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
                                                         &aclWeights,
                                                         optionalAclBiases,
@@ -61,17 +76,20 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
     arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
+    // Copy the weights' tensor into arm_compute tensor.
     m_WeightsTensor = std::make_unique<arm_compute::Tensor>();
     BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
-
+    InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight);
+    
     if (m_Data.m_Parameters.m_BiasEnabled)
     {
+        // Copy the biases tensor into arm_compute tensor.
         m_BiasesTensor = std::make_unique<arm_compute::Tensor>();
-        BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+        BuildArmComputeTensor(*m_BiasesTensor,  m_Data.m_Bias->GetTensorInfo());
+        InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias);
     }
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
-
     arm_compute::FullyConnectedLayerInfo fc_info =
         ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
 
@@ -79,28 +97,6 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
     layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
     m_FullyConnectedLayer.reset(layer.release());
 
-    // Allocate
-    if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QAsymmU8)
-    {
-        InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight);
-    }
-    else
-    {
-        InitializeArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight);
-    }
-
-    if (m_BiasesTensor)
-    {
-        if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32)
-        {
-            InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias);
-        }
-        else
-        {
-            InitializeArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias);
-        }
-    }
-
     // Add details for profiling output
     WorkloadInfo detailsInfo;
 
@@ -118,10 +114,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
                                          detailsInfo,
                                          this->GetGuid());
 
-    // Force Compute Library to perform the necessary copying and reshaping, after which
-    // delete all the input tensors that will no longer be needed
+    // Force Compute Library to perform the necessary copying and reshaping.
     m_FullyConnectedLayer->prepare();
-    FreeUnusedTensors();
+    FreeTensorIfUnused(m_WeightsTensor);
+    FreeTensorIfUnused(m_BiasesTensor);
 }
 
 void NeonFullyConnectedWorkload::Execute() const
@@ -130,10 +126,4 @@ void NeonFullyConnectedWorkload::Execute() const
     m_FullyConnectedLayer->run();
 }
 
-void NeonFullyConnectedWorkload::FreeUnusedTensors()
-{
-    FreeTensorIfUnused(m_WeightsTensor);
-    FreeTensorIfUnused(m_BiasesTensor);
-}
-
 } //namespace armnn
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
index 419a3299f2..944731d7bd 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
@@ -33,11 +33,9 @@ public:
 
 private:
     std::unique_ptr<arm_compute::IFunction> m_FullyConnectedLayer;
-
     std::unique_ptr<arm_compute::Tensor> m_WeightsTensor;
     std::unique_ptr<arm_compute::Tensor> m_BiasesTensor;
 
-    void FreeUnusedTensors();
 };
 
 } //namespace armnn
-- 
cgit v1.2.1