From 81beae3a870004795275e9266bc43d845b9f78db Mon Sep 17 00:00:00 2001 From: Matthew Sloyan Date: Tue, 13 Jul 2021 19:46:11 +0100 Subject: IVGCVSW-6119 ConstTensorsAsInput: FullyConnected * Constant weights and biases are now stored as Constant layers. * Updated Serializer, Deserializer and unit tests to reflect this. * Updated TfLiteDelegate, TfLiteParser and OnnxParser. * Updated Schema with IsConstant and ConstantTensorsAsInputs. * Updated Ref backend to handle constant weights and bias as inputs rather than reading from member variables. * Added dynamic or constant input EndToEnd tests. !android-nn-driver:5959 Signed-off-by: Matthew Sloyan Change-Id: Ibf3cf437df1100e4b322b0d303c575c6339f9696 --- delegate/src/DelegateUtils.hpp | 5 + delegate/src/FullyConnected.hpp | 43 ++-- delegate/src/test/FullyConnectedTest.cpp | 10 +- include/armnn/Descriptors.hpp | 2 +- include/armnn/ILayerVisitor.hpp | 11 + include/armnn/INetwork.hpp | 20 +- include/armnn/LayerVisitorBase.hpp | 4 + src/armnn/BackendHelper.cpp | 47 ++++- src/armnn/Descriptors.cpp | 16 +- src/armnn/Network.cpp | 128 +++++------ src/armnn/Network.hpp | 16 +- src/armnn/layers/FullyConnectedLayer.cpp | 79 +------ src/armnn/layers/FullyConnectedLayer.hpp | 2 + src/armnn/test/ConstTensorLayerVisitor.cpp | 52 ++++- src/armnn/test/ConstTensorLayerVisitor.hpp | 10 - src/armnn/test/CreateWorkload.hpp | 85 ++++++-- src/armnn/test/GraphTests.cpp | 8 +- src/armnn/test/NetworkTests.cpp | 36 +++- src/armnn/test/ShapeInferenceTests.cpp | 18 +- .../test/optimizations/FuseActivationTests.cpp | 185 ++++++++++++++-- src/armnnDeserializer/Deserializer.cpp | 75 ++++--- src/armnnDeserializer/Deserializer.hpp | 12 +- .../test/DeserializeFullyConnected.cpp | 233 ++++++++++++++++++++- src/armnnOnnxParser/OnnxParser.cpp | 51 ++++- src/armnnSerializer/ArmnnSchema.fbs | 4 +- src/armnnSerializer/ArmnnSchema_generated.h | 34 ++- src/armnnSerializer/Serializer.cpp | 26 +-- src/armnnSerializer/Serializer.hpp | 1 - src/armnnSerializer/test/SerializerTests.cpp | 91 +++++++- src/armnnTfLiteParser/TfLiteParser.cpp | 80 +++---- src/backends/backendsCommon/WorkloadData.cpp | 41 +--- src/backends/backendsCommon/WorkloadFactory.cpp | 30 +-- .../test/FullyConnectedEndToEndTestImpl.hpp | 170 ++++++++++++++- .../test/layerTests/FullyConnectedTestImpl.cpp | 112 +++------- src/backends/reference/RefBackend.hpp | 3 +- .../reference/test/RefCreateWorkloadTests.cpp | 18 ++ src/backends/reference/test/RefEndToEndTests.cpp | 12 +- .../workloads/RefFullyConnectedWorkload.cpp | 42 +--- 38 files changed, 1232 insertions(+), 580 deletions(-) diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp index b04baac36e..2d1651842a 100644 --- a/delegate/src/DelegateUtils.hpp +++ b/delegate/src/DelegateUtils.hpp @@ -482,6 +482,11 @@ armnn::ConstTensor CreateConstTensor(const TfLiteTensor* tfLiteTensor, "TfLiteArmnnDelegate: Not constant allocation type: " + std::to_string(tfLiteTensor->allocation_type)); } + if(tflite::IsConstantTensor(tfLiteTensor)) + { + tensorInfo.SetConstant(); + } + if (permutationVector.has_value() && permutationVector.value().GetSize() > 0 && permutationData != nullptr) { // Permute tensor info diff --git a/delegate/src/FullyConnected.hpp b/delegate/src/FullyConnected.hpp index e94304fb21..49686d6eaf 100644 --- a/delegate/src/FullyConnected.hpp +++ b/delegate/src/FullyConnected.hpp @@ -130,30 +130,39 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData, return isSupported ? kTfLiteOk : kTfLiteError; } - armnn::Optional optionalWeights = armnn::EmptyOptional(); - armnn::Optional optionalBiases = armnn::EmptyOptional(); - if(descriptor.m_ConstantWeights) + armnn::IConnectableLayer* layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor); + ARMNN_ASSERT(layer != nullptr); + + // Add a constant layer for weights and biases if inputs are constant. + if (isConstantWeights) { auto weightsTensor = CreateConstTensor(&tfLiteWeightsTensor, weightsTensorInfo, armnn::Optional()); - optionalWeights = armnn::Optional(weightsTensor); - if (biasEnabled) + armnn::IConnectableLayer* weightsLayer = delegateData.m_Network->AddConstantLayer(weightsTensor); + + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensorInfo); + } + + if (biasEnabled) + { + const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]]; + if(tflite::IsConstantTensor(&tfLiteBiasTensor)) { - const TfLiteTensor& tfLiteBiasTensor = tfLiteTensors[tfLiteNode->inputs->data[2]]; auto biasTensor = CreateConstTensor(&tfLiteBiasTensor, biasTensorInfo, armnn::Optional()); - optionalBiases = armnn::Optional(biasTensor); + + armnn::IConnectableLayer* biasLayer = delegateData.m_Network->AddConstantLayer(biasTensor); + ARMNN_ASSERT(biasLayer != nullptr); + + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u)); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensorInfo); } } - armnn::IConnectableLayer* layer = delegateData.m_Network->AddFullyConnectedLayer(descriptor, - optionalWeights, - optionalBiases); - ARMNN_ASSERT(layer != nullptr); - armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0); outputSlot.SetTensorInfo(outputTensorInfo); @@ -171,13 +180,15 @@ TfLiteStatus VisitFullyConnectedOperator(DelegateData& delegateData, // Connect delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(reshapeLayer->GetInputSlot(0)); reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); + if (!descriptor.m_ConstantWeights) { delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(layer->GetInputSlot(1)); - if (biasEnabled) - { - delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(2)); - } + } + + if (biasEnabled && !tflite::IsConstantTensor(&tfLiteTensors[tfLiteNode->inputs->data[2]])) + { + delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[2]]->Connect(layer->GetInputSlot(2)); } delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[0]] = &outputSlot; } diff --git a/delegate/src/test/FullyConnectedTest.cpp b/delegate/src/test/FullyConnectedTest.cpp index 3bea250988..c300bc72bf 100644 --- a/delegate/src/test/FullyConnectedTest.cpp +++ b/delegate/src/test/FullyConnectedTest.cpp @@ -34,7 +34,7 @@ void FullyConnectedFp32Test(std::vector& backends, bool consta constantWeights); } -void FullyConnectedActicationTest(std::vector& backends, bool constantWeights = true) +void FullyConnectedActivationTest(std::vector& backends, bool constantWeights = true) { std::vector inputTensorShape { 1, 4, 1, 1 }; std::vector weightsTensorShape { 1, 4 }; @@ -106,7 +106,7 @@ TEST_CASE ("FullyConnected_Int8_GpuAcc_Test") TEST_CASE ("FullyConnected_Activation_GpuAcc_Test") { std::vector backends = { armnn::Compute::GpuAcc }; - FullyConnectedActicationTest(backends); + FullyConnectedActivationTest(backends); } } // End of TEST_SUITE("FullyConnected_GpuAccTests") @@ -129,7 +129,7 @@ TEST_CASE ("FullyConnected_Int8_CpuAcc_Test") TEST_CASE ("FullyConnected_Activation_CpuAcc_Test") { std::vector backends = { armnn::Compute::CpuAcc }; - FullyConnectedActicationTest(backends); + FullyConnectedActivationTest(backends); } } // End of TEST_SUITE("FullyConnected_CpuAccTests") @@ -152,7 +152,7 @@ TEST_CASE ("FullyConnected_Int8_CpuRef_Test") TEST_CASE ("FullyConnected_Activation_CpuRef_Test") { std::vector backends = { armnn::Compute::CpuRef }; - FullyConnectedActicationTest(backends); + FullyConnectedActivationTest(backends); } TEST_CASE ("FullyConnected_Weights_As_Inputs_FP32_CpuRef_Test") @@ -170,7 +170,7 @@ TEST_CASE ("FullyConnected_Weights_As_Inputs_Int8_CpuRef_Test") TEST_CASE ("FullyConnected_Weights_As_Inputs_Activation_CpuRef_Test") { std::vector backends = { armnn::Compute::CpuRef }; - FullyConnectedActicationTest(backends, false); + FullyConnectedActivationTest(backends, false); } } // End of TEST_SUITE("FullyConnected_CpuRefTests") diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp index bcee902d75..7188a7bd3a 100644 --- a/include/armnn/Descriptors.hpp +++ b/include/armnn/Descriptors.hpp @@ -402,7 +402,7 @@ struct FullyConnectedDescriptor : BaseDescriptor } /// Get the number of views/inputs. - uint32_t GetNumViews() const; + uint32_t GetNumInputs() const; /// Enable/disable bias. bool m_BiasEnabled; diff --git a/include/armnn/ILayerVisitor.hpp b/include/armnn/ILayerVisitor.hpp index b5112a8f0b..f7c769fbe3 100644 --- a/include/armnn/ILayerVisitor.hpp +++ b/include/armnn/ILayerVisitor.hpp @@ -198,6 +198,16 @@ public: virtual void VisitFloorLayer(const IConnectableLayer* layer, const char* name = nullptr) = 0; + + /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) + /// function is invoked. + /// @param layer - pointer to the layer which is calling back to this visit function. + /// @param fullyConnectedDescriptor - Description of the fully connected layer. + /// @param name - Optional name for the layer. + virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer, + const FullyConnectedDescriptor& fullyConnectedDescriptor, + const char* name = nullptr) = 0; + /// Function that a fully connected layer should call back to when its Accept(ILayerVisitor&) /// function is invoked. /// @param layer - pointer to the layer which is calling back to this visit function. @@ -205,6 +215,7 @@ public: /// @param weights - Tensor for the weights data. /// @param biases - Optional tensor for the bias data. /// @param name - Optional name for the layer. + ARMNN_DEPRECATED_MSG("Use VisitFullyConnectedLayer without ConstTensors") virtual void VisitFullyConnectedLayer(const IConnectableLayer* layer, const FullyConnectedDescriptor& fullyConnectedDescriptor, const ConstTensor& weights, diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 865d1291a9..48f407f2f9 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -301,37 +301,23 @@ public: IConnectableLayer* AddFillLayer(const FillDescriptor& fillDescriptor, const char* name = nullptr); - /// Adds a fully connected layer to the network. - /// @param fullyConnectedDescriptor - Description of the fully connected layer. - /// @param weights -Optional Tensor for the weights data. - /// @param biases - Optional tensor for the bias data. - /// @param name - Optional name for the layer. - /// @return - Interface for configuring the layer. - IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const Optional& weights, - const Optional& biases, - const char* name = nullptr); /// Adds a fully connected layer to the network. /// @param fullyConnectedDescriptor - Description of the fully connected layer. - /// @param weights - Tensor for the weights data. - /// @param biases - Optional tensor for the bias data. - /// @param name - Optional name for the layer. /// @return - Interface for configuring the layer. IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const Optional& biases, const char* name = nullptr); ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated") IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, + const Optional& weights, + const Optional& biases, const char* name = nullptr); ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated") IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, const ConstTensor& weights, - const ConstTensor& biases, + const Optional& biases, const char* name = nullptr); /// Adds a permute layer to the network. diff --git a/include/armnn/LayerVisitorBase.hpp b/include/armnn/LayerVisitorBase.hpp index fb88d559bb..d3378dccf5 100644 --- a/include/armnn/LayerVisitorBase.hpp +++ b/include/armnn/LayerVisitorBase.hpp @@ -108,6 +108,10 @@ public: void VisitFloorLayer(const IConnectableLayer*, const char*) override { DefaultPolicy::Apply(__func__); } + void VisitFullyConnectedLayer(const IConnectableLayer*, + const FullyConnectedDescriptor&, + const char*) override { DefaultPolicy::Apply(__func__); } + void VisitFullyConnectedLayer(const IConnectableLayer*, const FullyConnectedDescriptor&, const ConstTensor&, diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp index 13bde0aafa..9ab30f8fb2 100644 --- a/src/armnn/BackendHelper.cpp +++ b/src/armnn/BackendHelper.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -399,22 +400,48 @@ bool LayerSupportHandle::IsFullyConnectedSupported(const TensorInfo& input, const FullyConnectedDescriptor& descriptor, Optional reasonIfUnsupported) { - if(!descriptor.m_ConstantWeights && !m_BackendId.IsUndefined()) + if(!m_BackendId.IsUndefined()) { - auto capability = GetCapability("NonConstWeights", m_BackendId); - if (capability.has_value() && capability.value().GetValue().AsBool() == true) + auto capability = GetCapability("ConstantTensorsAsInputs", m_BackendId); + if(!capability.has_value() || capability.value().GetValue().AsBool() == false) { - return true; + if(!weights.IsConstant()) + { + return false; + } + if(descriptor.m_BiasEnabled) + { + if(!biases.IsConstant()) + { + return false; + } + } + + // At the first stage we will only print a warning. this is to give + // backend developers a chance to adopt and read weights from input slots. + ARMNN_LOG(warning) << "The backend makes use of a deprecated interface to read constant tensors. " + "If you are a backend developer please find more information in our " + "doxygen documentation on github https://github.com/ARM-software/armnn " + "under the keyword 'ConstTensorsAsInputs'."; + } + + if(!descriptor.m_ConstantWeights) + { + auto capability = GetCapability("NonConstWeights", m_BackendId); + if (capability.has_value() && capability.value().GetValue().AsBool() == true) + { + return true; + } + return false; } - return false; } return m_LayerSupport->IsFullyConnectedSupported(input, - output, - weights, - biases, - descriptor, - reasonIfUnsupported.value()); + output, + weights, + biases, + descriptor, + reasonIfUnsupported.value()); } bool LayerSupportHandle::IsGatherSupported(const TensorInfo& input0, diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp index 706992ccb0..4521894c28 100644 --- a/src/armnn/Descriptors.cpp +++ b/src/armnn/Descriptors.cpp @@ -425,19 +425,13 @@ int StridedSliceDescriptor::GetStopForAxis(const TensorShape& inputShape, } -uint32_t FullyConnectedDescriptor::GetNumViews() const +uint32_t FullyConnectedDescriptor::GetNumInputs() const { - // Return 1 with constant weights, otherwise check if bias is enabled - uint32_t numInputs = 1; - if (!m_ConstantWeights) + // Return 2 otherwise check if bias is enabled + unsigned int numInputs = 2; + if (m_BiasEnabled) { - // non-const weights - numInputs = 2; - if (m_BiasEnabled) - { - // non-const bias - numInputs = 3; - } + numInputs = 3; } return numInputs; } diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 83eafe7993..a29ce83c5a 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -30,6 +30,8 @@ #include +#include + #include #include #include @@ -178,38 +180,22 @@ IConnectableLayer* INetwork::AddFillLayer(const FillDescriptor& fillDescriptor, } IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const Optional& biases, const char* name) { - return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, - armnn::Optional(weights), - biases, - name); + return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, name); } IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, const ConstTensor& weights, + const Optional& biases, const char* name) { - armnn::Optional biases; return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, armnn::Optional(weights), biases, name); } -IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, - const char* name) -{ - return pNetworkImpl->AddFullyConnectedLayer(fullyConnectedDescriptor, - armnn::Optional(weights), - armnn::Optional(biases), - name); -} - IConnectableLayer* INetwork::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, const Optional& weights, const Optional& biases, @@ -1799,69 +1785,87 @@ IConnectableLayer* NetworkImpl::AddFillLayer(const FillDescriptor& fillDescripto return m_Graph->AddLayer(fillDescriptor, name); } -IConnectableLayer* NetworkImpl::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const Optional& weights, - const Optional& biases, - const char* name) +IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const char* name) +{ + return m_Graph->AddLayer(fullyConnectedDescriptor, name); +} + +IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, + const Optional& weights, + const Optional& biases, + const char* name) { - if (fullyConnectedDescriptor.m_ConstantWeights && !weights.has_value()) + ConstantLayer* weightsLayer = nullptr; + ConstantLayer* biasLayer = nullptr; + unsigned int numInputs = fullyConnectedDescriptor.GetNumInputs(); + + // Add a constant layer for weights + if (weights.has_value()) { - throw InvalidArgumentException("AddFullyConnectedLayer: weights cannot be empty"); + weightsLayer = m_Graph->AddLayer("Weights"); + weightsLayer->m_LayerOutput = std::make_shared(weights.value()); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsLayer->m_LayerOutput->GetTensorInfo()); + } + else if (fullyConnectedDescriptor.m_ConstantWeights) + { + throw InvalidArgumentException("AddFullyConnectedLayer: Constant weights tensor is empty."); + } - if (fullyConnectedDescriptor.m_BiasEnabled && !biases.has_value()) - { - throw InvalidArgumentException("AddFullyConnectedLayer: biases cannot be empty"); - } + // Add a constant layer for biases + if (biases.has_value() && fullyConnectedDescriptor.m_BiasEnabled) + { + biasLayer = m_Graph->AddLayer("Biases"); + biasLayer->m_LayerOutput = std::make_shared(biases.value()); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasLayer->m_LayerOutput->GetTensorInfo()); } - const auto layer = m_Graph->AddLayer(fullyConnectedDescriptor, name); + if (numInputs < 2) + { + throw InvalidArgumentException("AddFullyConnectedLayer: Requires at least 2 input tensors: Input, Weights"); + } + + auto layer = m_Graph->AddLayer(fullyConnectedDescriptor, name); + + if (weightsLayer) + { + // Connect weights layer + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + } - if (fullyConnectedDescriptor.m_ConstantWeights) + if ( fullyConnectedDescriptor.m_BiasEnabled && numInputs == 3 ) { - layer->m_Weight = std::make_shared(weights.value()); - if (fullyConnectedDescriptor.m_BiasEnabled) + if (biasLayer) { - layer->m_Bias = std::make_shared(biases.value()); + // Connect bias layer + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); } } + else if ( !fullyConnectedDescriptor.m_BiasEnabled && numInputs == 2 ) + { + // Bias is disabled + layer->m_Bias = nullptr; + } + else + { + throw InvalidArgumentException(fmt::format( + "AddFullyConnectedLayer: Value mismatch. When bias is enabled in the " + "descriptor the number of inputs is expected to be 3 otherwise 2. " + "BiasEnabled={}, numInputs={}", + fullyConnectedDescriptor.m_BiasEnabled, + numInputs)); + } return layer; } IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const Optional& weights, + const ConstTensor& weights, const Optional& biases, const char* name) -{ - return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, weights, biases, name); -} - -IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const Optional& biases, - const char* name) { Optional optionalWeights(weights); - return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, optionalWeights, biases, name); -} - -IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const char* name) -{ - Optional optionalWeights(weights); - Optional biases; - return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, optionalWeights, biases, name); -} - -IConnectableLayer* NetworkImpl::AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, - const char* name) -{ - Optional optionalWeights(weights); - Optional optionalBiases(biases); - return AddFullyConnectedLayerImpl(fullyConnectedDescriptor, optionalWeights, optionalBiases, name); + return AddFullyConnectedLayer(fullyConnectedDescriptor, optionalWeights, biases, name); } IConnectableLayer* NetworkImpl::AddConcatLayer(const ConcatDescriptor& concatDescriptor, diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index 54c3497c90..c22c865e3b 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -133,24 +133,17 @@ public: IConnectableLayer* AddFloorLayer(const char* name = nullptr); IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const Optional& weights, - const Optional& biases, const char* name = nullptr); IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, + const Optional& weights, const Optional& biases, const char* name = nullptr); ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated") IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, const ConstTensor& weights, - const char* name = nullptr); - - ARMNN_DEPRECATED_MSG("This AddFullyConnectedLayer overload is deprecated") - IConnectableLayer* AddFullyConnectedLayer(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const ConstTensor& biases, + const Optional& biases, const char* name = nullptr); ARMNN_DEPRECATED_MSG("This AddGatherLayer overload is deprecated") @@ -288,11 +281,6 @@ private: const Optional& biases, const char* name); - IConnectableLayer* AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor, - const Optional& weights, - const Optional& biases, - const char* name); - bool GetShapeInferenceMethod(); NetworkOptions m_NetworkOptions; diff --git a/src/armnn/layers/FullyConnectedLayer.cpp b/src/armnn/layers/FullyConnectedLayer.cpp index 9d4f57d260..8dfb011730 100644 --- a/src/armnn/layers/FullyConnectedLayer.cpp +++ b/src/armnn/layers/FullyConnectedLayer.cpp @@ -15,24 +15,20 @@ namespace armnn { FullyConnectedLayer::FullyConnectedLayer(const FullyConnectedDescriptor& param, const char* name) - : LayerWithParameters(param.GetNumViews(), 1, LayerType::FullyConnected, param, name) + : LayerWithParameters(param.GetNumInputs(), 1, LayerType::FullyConnected, param, name) { } std::unique_ptr FullyConnectedLayer::CreateWorkload(const IWorkloadFactory& factory) const { - // on this level constant data should not be released.. FullyConnectedQueueDescriptor descriptor; - if (m_Param.m_ConstantWeights) + if (m_Weight) { - ARMNN_ASSERT_MSG(m_Weight != nullptr, "FullyConnectedLayer: Weights data should not be null."); descriptor.m_Weight = m_Weight.get(); - - if (m_Param.m_BiasEnabled) - { - ARMNN_ASSERT_MSG(m_Bias != nullptr, "FullyConnectedLayer: Bias data should not be null."); - descriptor.m_Bias = m_Bias.get(); - } + } + if (m_Param.m_BiasEnabled && m_Bias) + { + descriptor.m_Bias = m_Bias.get(); } SetAdditionalInfo(descriptor); @@ -42,15 +38,6 @@ std::unique_ptr FullyConnectedLayer::CreateWorkload(const IWorkloadFa FullyConnectedLayer* FullyConnectedLayer::Clone(Graph& graph) const { auto layer = CloneBase(graph, m_Param, GetName()); - if (m_Param.m_ConstantWeights) - { - layer->m_Weight = m_Weight ? m_Weight : nullptr; - - if (layer->m_Param.m_BiasEnabled) - { - layer->m_Bias = m_Bias ? m_Bias : nullptr; - } - } return std::move(layer); } @@ -73,20 +60,9 @@ void FullyConnectedLayer::ValidateTensorShapesFromInputs() VerifyShapeInferenceType(outputShape, m_ShapeInferenceMethod); - std::vector inferredShapes; - if (m_Param.m_ConstantWeights) - { - // check if m_Weight data is not nullptr - ARMNN_ASSERT_MSG(m_Weight != nullptr, "FullyConnectedLayer: Weights data should not be null."); - - inferredShapes = InferOutputShapes({GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), - m_Weight->GetTensorInfo().GetShape()}); - } - else - { - inferredShapes = InferOutputShapes({GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), - GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()}); - } + std::vector inferredShapes = InferOutputShapes( + {GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape(), + GetInputSlot(1).GetConnection()->GetTensorInfo().GetShape()}); ARMNN_ASSERT(inferredShapes.size() == 1); ARMNN_ASSERT(inferredShapes[0].GetDimensionality() == Dimensionality::Specified); @@ -101,45 +77,12 @@ Layer::ConstantTensors FullyConnectedLayer::GetConstantTensorsByRef() void FullyConnectedLayer::Accept(ILayerVisitor& visitor) const { - Optional optionalWeightsTensor = EmptyOptional(); - Optional optionalBiasTensor = EmptyOptional(); - - ManagedConstTensorHandle managedWeight(m_Weight); - ManagedConstTensorHandle managedBias(m_Bias); - if (GetParameters().m_ConstantWeights) - { - ConstTensor weightsTensor(managedWeight.GetTensorInfo(), managedWeight.Map()); - optionalWeightsTensor = Optional(weightsTensor); - - if (GetParameters().m_BiasEnabled) - { - ConstTensor biasTensor(managedBias.GetTensorInfo(), managedBias.Map()); - optionalBiasTensor = Optional(biasTensor); - } - } - - visitor.VisitFullyConnectedLayer(this, - GetParameters(), - optionalWeightsTensor.value(), - optionalBiasTensor, - GetName()); + visitor.VisitFullyConnectedLayer(this, GetParameters(), GetName()); } void FullyConnectedLayer::ExecuteStrategy(IStrategy& strategy) const { - std::vector constTensors; - ManagedConstTensorHandle managedWeight(m_Weight); - ManagedConstTensorHandle managedBias(m_Bias); - - if(GetParameters().m_ConstantWeights) - { - constTensors.emplace_back(ConstTensor(managedWeight.GetTensorInfo(), managedWeight.Map())); - if (GetParameters().m_BiasEnabled) - { - constTensors.emplace_back(ConstTensor(managedBias.GetTensorInfo(), managedBias.Map())); - } - } - strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); + strategy.ExecuteStrategy(this, GetParameters(), {}, GetName()); } } // namespace armnn diff --git a/src/armnn/layers/FullyConnectedLayer.hpp b/src/armnn/layers/FullyConnectedLayer.hpp index 7fc7b0d596..5639bf27b4 100644 --- a/src/armnn/layers/FullyConnectedLayer.hpp +++ b/src/armnn/layers/FullyConnectedLayer.hpp @@ -16,8 +16,10 @@ class FullyConnectedLayer : public LayerWithParameters { public: /// A unique pointer to store Weight values. + /// @Note: Deprecated. Weights are stored in ConstantLayers now. std::shared_ptr m_Weight; /// A unique pointer to store Bias values. + /// @Note: Deprecated. Bias are stored in ConstantLayers now. std::shared_ptr m_Bias; /// Makes a workload for the FullyConnected type. diff --git a/src/armnn/test/ConstTensorLayerVisitor.cpp b/src/armnn/test/ConstTensorLayerVisitor.cpp index baafcf41ef..d3d8698972 100644 --- a/src/armnn/test/ConstTensorLayerVisitor.cpp +++ b/src/armnn/test/ConstTensorLayerVisitor.cpp @@ -484,16 +484,23 @@ TEST_CASE("CheckFullyConnectedLayer") { FullyConnectedDescriptor descriptor; descriptor.m_TransposeWeightMatrix = true; + descriptor.m_ConstantWeights = true; + descriptor.m_BiasEnabled = false; std::vector data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector dimensions = {1, 1, 3, 3}; ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32), data); - TestFullyConnectedLayerVistor visitor(descriptor, weights, EmptyOptional()); + TestConstantLayerVisitor weightsVisitor(weights); + TestFullyConnectedLayerVistor visitor(descriptor); NetworkImpl net; - IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, EmptyOptional()); + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + weightsLayer->Accept(weightsVisitor); layer->Accept(visitor); } @@ -502,16 +509,23 @@ TEST_CASE("CheckNamedFullyConnectedLayer") const char* layerName = "FullyConnectedLayer"; FullyConnectedDescriptor descriptor; descriptor.m_TransposeWeightMatrix = true; + descriptor.m_ConstantWeights = true; + descriptor.m_BiasEnabled = false; std::vector data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector dimensions = {1, 1, 3, 3}; ConstTensor weights(TensorInfo(4, dimensions.data(), DataType::Float32), data); - TestFullyConnectedLayerVistor visitor(descriptor, weights, EmptyOptional(), layerName); + TestConstantLayerVisitor weightsVisitor(weights); + TestFullyConnectedLayerVistor visitor(descriptor, layerName); NetworkImpl net; - IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, EmptyOptional(), layerName); + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + + weightsLayer->Accept(weightsVisitor); layer->Accept(visitor); } @@ -519,6 +533,7 @@ TEST_CASE("CheckFullyConnectedLayerWithBiases") { FullyConnectedDescriptor descriptor; descriptor.m_TransposeWeightMatrix = true; + descriptor.m_ConstantWeights = true; descriptor.m_BiasEnabled = true; std::vector data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; @@ -528,13 +543,21 @@ TEST_CASE("CheckFullyConnectedLayerWithBiases") std::vector biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector biasDimensions = {1, 1, 3, 3}; ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32), biasData); - Optional optionalBiases(biases); - TestFullyConnectedLayerVistor visitor(descriptor, weights, optionalBiases); + TestConstantLayerVisitor weightsVisitor(weights); + TestConstantLayerVisitor biasesVisitor(biases); + TestFullyConnectedLayerVistor visitor(descriptor); NetworkImpl net; - IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, optionalBiases); + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases); + IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); + + weightsLayer->Accept(weightsVisitor); + biasesLayer->Accept(biasesVisitor); layer->Accept(visitor); } @@ -543,6 +566,7 @@ TEST_CASE("CheckNamedFullyConnectedLayerWithBiases") const char* layerName = "FullyConnectedLayer"; FullyConnectedDescriptor descriptor; descriptor.m_TransposeWeightMatrix = true; + descriptor.m_ConstantWeights = true; descriptor.m_BiasEnabled = true; std::vector data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; @@ -552,13 +576,21 @@ TEST_CASE("CheckNamedFullyConnectedLayerWithBiases") std::vector biasData = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; std::vector biasDimensions = {1, 1, 3, 3}; ConstTensor biases(TensorInfo(4, biasDimensions.data(), DataType::Float32), biasData); - Optional optionalBiases(biases); - TestFullyConnectedLayerVistor visitor(descriptor, weights, optionalBiases, layerName); + TestConstantLayerVisitor weightsVisitor(weights); + TestConstantLayerVisitor biasesVisitor(biases); + TestFullyConnectedLayerVistor visitor(descriptor, layerName); NetworkImpl net; - IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, weights, optionalBiases, layerName); + IConnectableLayer* const weightsLayer = net.AddConstantLayer(weights); + IConnectableLayer* const biasesLayer = net.AddConstantLayer(biases); + IConnectableLayer* const layer = net.AddFullyConnectedLayer(descriptor, layerName); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); + biasesLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2)); + + weightsLayer->Accept(weightsVisitor); + biasesLayer->Accept(biasesVisitor); layer->Accept(visitor); } diff --git a/src/armnn/test/ConstTensorLayerVisitor.hpp b/src/armnn/test/ConstTensorLayerVisitor.hpp index e423e0f6e3..35e2e872f7 100644 --- a/src/armnn/test/ConstTensorLayerVisitor.hpp +++ b/src/armnn/test/ConstTensorLayerVisitor.hpp @@ -90,36 +90,26 @@ class TestFullyConnectedLayerVistor : public TestLayerVisitor { public: explicit TestFullyConnectedLayerVistor(const FullyConnectedDescriptor& descriptor, - const ConstTensor& weights, - const Optional biases, const char* name = nullptr) : TestLayerVisitor(name) , m_Descriptor(descriptor) - , m_Weights(weights) - , m_Biases(biases) {} virtual ~TestFullyConnectedLayerVistor() {} void VisitFullyConnectedLayer(const IConnectableLayer* layer, const FullyConnectedDescriptor& fullyConnectedDescriptor, - const ConstTensor& weights, - const Optional& biases, const char* name = nullptr) override { CheckLayerPointer(layer); CheckLayerName(name); CheckDescriptor(fullyConnectedDescriptor); - CheckConstTensors(m_Weights, weights); - CheckOptionalConstTensors(m_Biases, biases); } protected: void CheckDescriptor(const FullyConnectedDescriptor& descriptor); private: FullyConnectedDescriptor m_Descriptor; - ConstTensor m_Weights; - Optional m_Biases; }; class TestBatchNormalizationLayerVisitor : public TestLayerVisitor diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp index b07e3b80a5..759ada97cd 100644 --- a/src/armnn/test/CreateWorkload.hpp +++ b/src/armnn/test/CreateWorkload.hpp @@ -1193,7 +1193,7 @@ std::unique_ptr CreateFullyConnectedWorkloadTest(armnn:: { // Creates the layer we're testing. FullyConnectedDescriptor layerDesc; - layerDesc.m_BiasEnabled = true; + layerDesc.m_BiasEnabled = false; layerDesc.m_TransposeWeightMatrix = true; FullyConnectedLayer* const layer = graph.AddLayer(layerDesc, "layer"); @@ -1201,17 +1201,24 @@ std::unique_ptr CreateFullyConnectedWorkloadTest(armnn:: float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; + // As optimization isn't run member variables need to be updated. layer->m_Weight = std::make_unique(TensorInfo({7, 20}, DataType, inputsQScale, 0)); - layer->m_Bias = std::make_unique(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale)); layer->m_Weight->Allocate(); - layer->m_Bias->Allocate(); + + armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale); + weightsTensorInfo.SetConstant(); // Creates extra layers. Layer* const input = graph.AddLayer(0, "input"); + auto const weights = graph.AddLayer("weights"); Layer* const output = graph.AddLayer(0, "output"); + weights->m_LayerOutput = std::make_unique(weightsTensorInfo); + weights->m_LayerOutput->Allocate(); + // Connects up. - Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)); + Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0); + Connect(weights, layer, weightsTensorInfo, 0, 1); Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale)); CreateTensorHandles(graph, factory); @@ -1219,13 +1226,10 @@ std::unique_ptr CreateFullyConnectedWorkloadTest(armnn:: auto workload = MakeAndCheckWorkload(*layer, factory); FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); - CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true); CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true); - CHECK(queueDescriptor.m_Inputs.size() == 1); + CHECK(queueDescriptor.m_Inputs.size() == 2); CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({7, 20}, DataType, inputsQScale))); - CHECK((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({7}, GetBiasDataType(DataType), inputsQScale))); // Returns so we can do extra, backend-specific tests. return workload; @@ -1246,11 +1250,17 @@ std::unique_ptr CreateFullyConnectedWithBlobWorkloadTest float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; + // As optimization isn't run member variables need to be updated. layer->m_Weight = std::make_unique(TensorInfo({7, 20}, DataType, inputsQScale, 0)); layer->m_Bias = std::make_unique(TensorInfo({7}, GetBiasDataType(DataType), inputsQScale)); layer->m_Weight->Allocate(); layer->m_Bias->Allocate(); + armnn::TensorInfo weightsTensorInfo({7, 20}, DataType, inputsQScale); + armnn::TensorInfo biasesTensorInfo({7}, GetBiasDataType(DataType), inputsQScale); + weightsTensorInfo.SetConstant(); + biasesTensorInfo.SetConstant(); + auto activationDesc = std::make_shared(); activationDesc->m_A = 10.0f; activationDesc->m_B = 5.0f; @@ -1267,10 +1277,19 @@ std::unique_ptr CreateFullyConnectedWithBlobWorkloadTest // Creates extra layers. Layer* const input = graph.AddLayer(0, "input"); + auto const weights = graph.AddLayer("weights"); + auto const biases = graph.AddLayer("biases"); Layer* const output = graph.AddLayer(0, "output"); + weights->m_LayerOutput = std::make_unique(weightsTensorInfo); + weights->m_LayerOutput->Allocate(); + biases->m_LayerOutput = std::make_unique(biasesTensorInfo); + biases->m_LayerOutput->Allocate(); + // Connects up. - Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale)); + Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0); + Connect(weights, layer, weightsTensorInfo, 0, 1); + Connect(biases, layer, biasesTensorInfo, 0, 2); Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale)); CreateTensorHandles(graph, factory); @@ -1290,10 +1309,52 @@ std::unique_ptr CreateFullyConnectedWithBlobWorkloadTest CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true); CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true); - CHECK(queueDescriptor.m_Inputs.size() == 1); + CHECK(queueDescriptor.m_Inputs.size() == 3); + CHECK(queueDescriptor.m_Outputs.size() == 1); + + // Returns so we can do extra, backend-specific tests. + return workload; +} + +template +std::unique_ptr CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest + (armnn::IWorkloadFactory& factory, + armnn::Graph& graph) +{ + // Creates the layer we're testing. + FullyConnectedDescriptor layerDesc; + layerDesc.m_BiasEnabled = true; + layerDesc.m_TransposeWeightMatrix = true; + layerDesc.m_ConstantWeights = false; + + FullyConnectedLayer* const layer = graph.AddLayer(layerDesc, "layer"); + + float inputsQScale = DataType == armnn::DataType::QAsymmU8 ? 1.0f : 0.0; + float outputQScale = DataType == armnn::DataType::QAsymmU8 ? 2.0f : 0.0; + + // Creates extra layers with weights and biases as input layers. + Layer* const input = graph.AddLayer(1, "input"); + Layer* const weights = graph.AddLayer(2, "weights"); + Layer* const biases = graph.AddLayer(3, "biases"); + Layer* const output = graph.AddLayer(0, "output"); + + // Connects up. + Connect(input, layer, TensorInfo({3, 1, 4, 5}, DataType, inputsQScale), 0, 0); + Connect(weights, layer, TensorInfo({7, 20}, DataType, inputsQScale), 0, 1); + Connect(biases, layer, TensorInfo({7}, GetBiasDataType(DataType), inputsQScale), 0, 2); + Connect(layer, output, TensorInfo({3, 7}, DataType, outputQScale)); + CreateTensorHandles(graph, factory); + + // Makes the workload and checks it. + auto workload = MakeAndCheckWorkload(*layer, factory); + + FullyConnectedQueueDescriptor queueDescriptor = workload->GetData(); + + CHECK(queueDescriptor.m_Parameters.m_BiasEnabled == true); + CHECK(queueDescriptor.m_Parameters.m_TransposeWeightMatrix == true); + CHECK(queueDescriptor.m_Parameters.m_ConstantWeights == false); + CHECK(queueDescriptor.m_Inputs.size() == 3); CHECK(queueDescriptor.m_Outputs.size() == 1); - CHECK((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({7, 20}, DataType, inputsQScale))); - CHECK((queueDescriptor.m_Bias->GetTensorInfo() == TensorInfo({7}, GetBiasDataType(DataType), inputsQScale))); // Returns so we can do extra, backend-specific tests. return workload; diff --git a/src/armnn/test/GraphTests.cpp b/src/armnn/test/GraphTests.cpp index 0dc2619e51..b697f6dbe6 100644 --- a/src/armnn/test/GraphTests.cpp +++ b/src/armnn/test/GraphTests.cpp @@ -598,14 +598,14 @@ TEST_CASE("CheckGraphConstTensorSharing") { armnn::Graph graph1; - armnn::FullyConnectedLayer* const fcLayer = - graph1.AddLayer(armnn::FullyConnectedDescriptor(), "fc"); + armnn::ConstantLayer* const constantLayer = graph1.AddLayer("ConstantLayer"); float weight = 1.0f; armnn::ConstTensor constTensor({{ 1, 1 }, armnn::DataType::Float32}, &weight); - fcLayer->m_Weight = std::make_shared(constTensor);; + constantLayer->m_LayerOutput = std::make_shared(constTensor);; + // point sharedWeightPtr to graph1's const tensor - sharedWeightPtr = fcLayer->m_Weight->GetConstTensor(); + sharedWeightPtr = constantLayer->m_LayerOutput->GetConstTensor(); graph0 = armnn::Graph(graph1); // graph1 goes out of scope diff --git a/src/armnn/test/NetworkTests.cpp b/src/armnn/test/NetworkTests.cpp index d763a85100..9acb60df4a 100644 --- a/src/armnn/test/NetworkTests.cpp +++ b/src/armnn/test/NetworkTests.cpp @@ -86,12 +86,15 @@ TEST_CASE("NetworkModification") inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0)); armnn::FullyConnectedDescriptor fullyConnectedDesc; + + // Constant layer that now holds weights data for FullyConnected + armnn::IConnectableLayer* const constantWeightsLayer = net.AddConstantLayer(weights, "const weights"); armnn::IConnectableLayer* const fullyConnectedLayer = net.AddFullyConnectedLayer(fullyConnectedDesc, - weights, - armnn::EmptyOptional(), "fully connected"); + CHECK(constantWeightsLayer); CHECK(fullyConnectedLayer); + constantWeightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1)); convLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); armnn::Pooling2dDescriptor pooling2dDesc; @@ -152,11 +155,12 @@ TEST_CASE("NetworkModification") multiplicationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); //Tests that all layers are present in the graph. - CHECK(net.GetGraph().GetNumLayers() == 11); + CHECK(net.GetGraph().GetNumLayers() == 12); //Tests that the vertices exist and have correct names. CHECK(GraphHasNamedLayer(net.GetGraph(), "input layer")); CHECK(GraphHasNamedLayer(net.GetGraph(), "conv layer")); + CHECK(GraphHasNamedLayer(net.GetGraph(), "const weights")); CHECK(GraphHasNamedLayer(net.GetGraph(), "fully connected")); CHECK(GraphHasNamedLayer(net.GetGraph(), "pooling2d")); CHECK(GraphHasNamedLayer(net.GetGraph(), "activation")); @@ -200,6 +204,28 @@ TEST_CASE("NetworkModification") CHECK(&srcLayer->GetOutputSlot(0) == tgtLayer->GetInputSlot(i).GetConnection()); } }; + auto checkOneOutputToTwoInputConnectionForTwoDifferentLayers = [] + (const armnn::IConnectableLayer* const srcLayer1, + const armnn::IConnectableLayer* const srcLayer2, + const armnn::IConnectableLayer* const tgtLayer, + int expectedSrcNumInputs1 = 1, + int expectedSrcNumInputs2 = 1, + int expectedDstNumOutputs = 1) + { + CHECK(srcLayer1->GetNumInputSlots() == expectedSrcNumInputs1); + CHECK(srcLayer1->GetNumOutputSlots() == 1); + CHECK(srcLayer2->GetNumInputSlots() == expectedSrcNumInputs2); + CHECK(srcLayer2->GetNumOutputSlots() == 1); + CHECK(tgtLayer->GetNumInputSlots() == 2); + CHECK(tgtLayer->GetNumOutputSlots() == expectedDstNumOutputs); + + CHECK(srcLayer1->GetOutputSlot(0).GetNumConnections() == 1); + CHECK(srcLayer2->GetOutputSlot(0).GetNumConnections() == 1); + CHECK(srcLayer1->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(0)); + CHECK(srcLayer2->GetOutputSlot(0).GetConnection(0) == &tgtLayer->GetInputSlot(1)); + CHECK(&srcLayer1->GetOutputSlot(0) == tgtLayer->GetInputSlot(0).GetConnection()); + CHECK(&srcLayer2->GetOutputSlot(0) == tgtLayer->GetInputSlot(1).GetConnection()); + }; CHECK(AreAllLayerInputSlotsConnected(*convLayer)); CHECK(AreAllLayerInputSlotsConnected(*fullyConnectedLayer)); @@ -214,8 +240,8 @@ TEST_CASE("NetworkModification") // Checks connectivity. checkOneOutputToOneInputConnection(inputLayer, convLayer, 0); - checkOneOutputToOneInputConnection(convLayer, fullyConnectedLayer); - checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer); + checkOneOutputToTwoInputConnectionForTwoDifferentLayers(convLayer, constantWeightsLayer, fullyConnectedLayer, 1, 0); + checkOneOutputToOneInputConnection(fullyConnectedLayer, poolingLayer, 2, 1); checkOneOutputToOneInputConnection(poolingLayer, activationLayer); checkOneOutputToOneInputConnection(activationLayer, normalizationLayer); checkOneOutputToOneInputConnection(normalizationLayer, softmaxLayer); diff --git a/src/armnn/test/ShapeInferenceTests.cpp b/src/armnn/test/ShapeInferenceTests.cpp index 8abcfd7595..d3c928fec1 100644 --- a/src/armnn/test/ShapeInferenceTests.cpp +++ b/src/armnn/test/ShapeInferenceTests.cpp @@ -401,24 +401,16 @@ TEST_CASE("FloorTest") TEST_CASE("FullyConnectedTest") { - Graph graph; - const unsigned int inputWidth = 3u; const unsigned int inputHeight = 2u; const unsigned int inputChannels = 1u; const unsigned int outputChannels = 2u; - auto layer = BuildGraph(&graph, - {{1, inputChannels, inputHeight, inputWidth}}, - FullyConnectedDescriptor(), - "fc"); - - - const float Datum = 0.0f; - ConstTensor weights({{inputChannels, outputChannels}, DataType::Float32}, &Datum); - layer->m_Weight = std::make_unique(weights); - - RunShapeInferenceTest(layer, {{ 1, outputChannels }}); + CreateGraphAndRunTest({{ 1, inputChannels, inputHeight, inputWidth }, // input + { inputChannels, outputChannels }}, // weights + {{ 1, outputChannels }}, // output + FullyConnectedDescriptor(), + "fc"); } TEST_CASE("GatherTest") diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp index 24ea8f6680..2352a3c498 100644 --- a/src/armnn/test/optimizations/FuseActivationTests.cpp +++ b/src/armnn/test/optimizations/FuseActivationTests.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "test/GraphUtils.hpp" #include #include @@ -41,6 +42,7 @@ struct Convolution2dTest { using LayerType = Convolution2dLayer; static const bool isElementWise = false; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout @@ -70,6 +72,16 @@ struct Convolution2dTest return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template> @@ -78,6 +90,7 @@ struct DWConvolution2dTest public: using LayerType = DepthwiseConvolution2dLayer; static const bool isElementWise = false; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin] static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout] @@ -108,6 +121,16 @@ public: return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template> @@ -116,6 +139,7 @@ struct FullyConnectedTest public: using LayerType = FullyConnectedLayer; static const bool isElementWise = false; + static const bool isConstTensorAsInputSupported = true; static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout @@ -129,18 +153,31 @@ public: float scale = 1.f, int32_t offset = 0) { + IgnoreUnused(scale); + IgnoreUnused(offset); + FullyConnectedDescriptor descriptor; descriptor.m_BiasEnabled = false; + return network->AddFullyConnectedLayer(descriptor, name); + } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { std::vector weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15}; + 11, 12, 13, 14, 15}; std::vector weightsVector = armnnUtils::QuantizedVector(weightsData, scale, offset); - TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset); + TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true); ConstTensor weights(weightsInfo, weightsVector); - Optional optionalBias; - return network->AddFullyConnectedLayer(descriptor, weights, optionalBias, name); + IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights"); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + + std::vector layers = { weightsLayer }; + return layers; } }; @@ -150,6 +187,7 @@ struct BatchNormTest public: using LayerType = BatchNormalizationLayer; static const bool isElementWise = false; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout @@ -181,6 +219,16 @@ public: return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template> @@ -188,6 +236,7 @@ struct MultiplicationTest { using LayerType = MultiplicationLayer; static const bool isElementWise = true; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout @@ -205,6 +254,16 @@ struct MultiplicationTest return network->AddMultiplicationLayer(name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template> @@ -212,6 +271,7 @@ struct AdditionTest { using LayerType = AdditionLayer; static const bool isElementWise = true; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout @@ -229,6 +289,16 @@ struct AdditionTest return network->AddAdditionLayer(name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template> @@ -236,6 +306,7 @@ struct SubtractionTest { using LayerType = SubtractionLayer; static const bool isElementWise = true; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout @@ -253,6 +324,16 @@ struct SubtractionTest return network->AddSubtractionLayer(name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template> @@ -260,6 +341,7 @@ struct DivisionTest { using LayerType = DivisionLayer; static const bool isElementWise = true; + static const bool isConstTensorAsInputSupported = false; static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout @@ -277,11 +359,21 @@ struct DivisionTest return network->AddDivisionLayer(name); } + + static std::vector AddConstantLayers(INetwork* network, + float scale = 1.f, + int32_t offset = 0) + { + IgnoreUnused(network); + IgnoreUnused(scale); + IgnoreUnused(offset); + return {}; + } }; template -INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing, +INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing, float scale, int32_t offset) { // Create a network @@ -300,6 +392,20 @@ INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool prevent IConnectableLayer* outputLayer = network->AddOutputLayer(0); IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr; + // If ConstTensorAsInputs is supported weights and bias are stored as constant layers. + if(LayerTest::isConstTensorAsInputSupported) + { + std::vector constantLayers = LayerTest::AddConstantLayers(network.get(), + scale, + offset); + + // Connect constant layers to receiverLayer. + for (unsigned int i = 0; i < constantLayers.size(); ++i) + { + constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1)); + } + } + // Define layers information TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset); TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset); @@ -335,7 +441,7 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript { // FIRST NETWORK: Fused // Construct ArmNN network - INetworkPtr networkFused = CreatNetwork(activationDescriptor, false, scale, offset); + INetworkPtr networkFused = CreateNetwork(activationDescriptor, false, scale, offset); // Create ArmNN runtime IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options @@ -350,12 +456,31 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript (layer->GetNameStr() == "fused-activation-into-receiverLayer"); }; - CHECK(3 == graphFused.GetNumLayers()); - CHECK(CheckSequence(graphFused.cbegin(), - graphFused.cend(), - &IsLayerOfType, - checkFusedConv2d, - &IsLayerOfType)); + // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers. + if(LayerTest::isConstTensorAsInputSupported) + { + CHECK(4 == graphFused.GetNumLayers()); + CHECK(CheckSequence(graphFused.cbegin(), + graphFused.cend(), + &IsLayerOfType, + &IsLayerOfType, + checkFusedConv2d, + &IsLayerOfType)); + + // Check if new constant layer is connected to fused receiver layer. + Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer"); + CHECK(fusedReceiverLayer); + CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr); + } + else + { + CHECK(3 == graphFused.GetNumLayers()); + CHECK(CheckSequence(graphFused.cbegin(), + graphFused.cend(), + &IsLayerOfType, + checkFusedConv2d, + &IsLayerOfType)); + } // Load network into runtime NetworkId networkIdentifier; @@ -376,7 +501,7 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript // SECOND NETWORK: NotFused // Construct ArmNN network - INetworkPtr networkNotFused = CreatNetwork(activationDescriptor, true, scale, offset); + INetworkPtr networkNotFused = CreateNetwork(activationDescriptor, true, scale, offset); // Create ArmNN runtime IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options @@ -386,14 +511,30 @@ void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescript Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get()); - CHECK(5 == graphNotFused.GetNumLayers()); - CHECK(CheckSequence(graphNotFused.cbegin(), - graphNotFused.cend(), - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType, - &IsLayerOfType)); + // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers. + if(LayerTest::isConstTensorAsInputSupported) + { + CHECK(6 == graphNotFused.GetNumLayers()); + CHECK(CheckSequence(graphNotFused.cbegin(), + graphNotFused.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + } + else + { + CHECK(5 == graphNotFused.GetNumLayers()); + CHECK(CheckSequence(graphNotFused.cbegin(), + graphNotFused.cend(), + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType, + &IsLayerOfType)); + } // Load network into runtime NetworkId networkIdentifierNotFused; @@ -433,7 +574,7 @@ bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute try { // Construct ArmNN network - INetworkPtr networkFused = CreatNetwork(activationDescriptor, false, scale, offset); + INetworkPtr networkFused = CreateNetwork(activationDescriptor, false, scale, offset); // Create ArmNN runtime IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp index 2d9194a350..5c99496744 100644 --- a/src/armnnDeserializer/Deserializer.cpp +++ b/src/armnnDeserializer/Deserializer.cpp @@ -688,6 +688,7 @@ armnn::ConstTensor ToConstTensor(ConstTensorRawPtr constTensorPtr) { CHECK_CONST_TENSOR_PTR(constTensorPtr); armnn::TensorInfo tensorInfo = ToTensorInfo(constTensorPtr->info()); + tensorInfo.SetConstant(); switch (constTensorPtr->data_type()) { @@ -938,6 +939,7 @@ IDeserializer::DeserializerImpl::FeatureVersions IDeserializer::DeserializerImpl { versions.m_BindingIdScheme = graph->featureVersions()->bindingIdsScheme(); versions.m_WeightsLayoutScheme = graph->featureVersions()->weightsLayoutScheme(); + versions.m_ConstTensorsAsInputs = graph->featureVersions()->constantTensorsAsInputs(); } return versions; @@ -1052,13 +1054,15 @@ void IDeserializer::DeserializerImpl::RegisterOutputSlots(GraphPtr graph, } void IDeserializer::DeserializerImpl::RegisterInputSlots(GraphPtr graph, - uint32_t layerIndex, - armnn::IConnectableLayer* layer) + uint32_t layerIndex, + armnn::IConnectableLayer* layer, + std::vector ignoreSlots) { CHECK_LAYERS(graph, 0, layerIndex); ARMNN_ASSERT(layer != nullptr); LayerBaseRawPtr baseLayer = GetBaseLayer(graph, layerIndex); - if (baseLayer->inputSlots()->size() != layer->GetNumInputSlots()) + + if (baseLayer->inputSlots()->size() != (layer->GetNumInputSlots() - ignoreSlots.size())) { throw ParseException(fmt::format("The number of inputslots ({0}) does not match the number expected ({1})" " for layer index:{2} {3}", @@ -1070,10 +1074,14 @@ void IDeserializer::DeserializerImpl::RegisterInputSlots(GraphPtr graph, for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) { - auto fbInputSlot = baseLayer->inputSlots()->Get(i); - auto fbConnection = fbInputSlot->connection(); - armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index())); - RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot); + // Check if slot should be ignored. + if (std::find(ignoreSlots.begin(), ignoreSlots.end(), i) == ignoreSlots.end()) + { + auto fbInputSlot = baseLayer->inputSlots()->Get(i); + auto fbConnection = fbInputSlot->connection(); + armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index())); + RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot); + } } } @@ -1924,40 +1932,47 @@ void IDeserializer::DeserializerImpl::ParseFullyConnected(GraphPtr graph, unsign fullyConnectedDescriptor.m_BiasEnabled = flatBufferDescriptor->biasEnabled(); fullyConnectedDescriptor.m_TransposeWeightMatrix = flatBufferDescriptor->transposeWeightsMatrix(); fullyConnectedDescriptor.m_ConstantWeights = flatBufferDescriptor->constantWeights(); - uint32_t numInputs = 1; - if (!fullyConnectedDescriptor.m_ConstantWeights) + + armnn::IConnectableLayer* layer; + std::vector ignoreSlots {}; + + // Weights and biases used to be always constant and were stored as members of the layer. This has changed and + // they are now passed as inputs. If they are constant then they will be stored in a ConstantLayer. + if (this->GetFeatureVersions(graph).m_ConstTensorsAsInputs <= 0) { - numInputs = 2; + // If the model stores weights and biases as members of the layer we have to read them from there + // but add them to their own ConstantLayer for compatibility + CHECK_VALID_SIZE(inputs.size(), 1); + layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor, + layerName.c_str()); + + armnn::ConstTensor weightsTensor = ToConstTensor(flatBufferLayer->weights()); + auto weightsLayer = m_Network->AddConstantLayer(weightsTensor); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensor.GetInfo()); + ignoreSlots.emplace_back(1u); + if (fullyConnectedDescriptor.m_BiasEnabled) { - numInputs = 3; + armnn::ConstTensor biasTensor = ToConstTensor(flatBufferLayer->biases()); + auto biasLayer = m_Network->AddConstantLayer(biasTensor); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u)); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensor.GetInfo()); + ignoreSlots.emplace_back(2u); } } - CHECK_VALID_SIZE(inputs.size(), numInputs); - - armnn::Optional optionalWeights = armnn::EmptyOptional(); - armnn::Optional optionalBiases = armnn::EmptyOptional(); - if (fullyConnectedDescriptor.m_ConstantWeights) + else { - armnn::ConstTensor weightsTensorData = ToConstTensor(flatBufferLayer->weights()); - optionalWeights = armnn::Optional(weightsTensorData); - - if (flatBufferDescriptor->biasEnabled()) - { - armnn::ConstTensor biasTensorData = ToConstTensor(flatBufferLayer->biases()); - optionalBiases = armnn::Optional(biasTensorData); - } + layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor, + layerName.c_str()); + uint32_t numInputs = fullyConnectedDescriptor.GetNumInputs(); + CHECK_VALID_SIZE(inputs.size(), numInputs); } - armnn::IConnectableLayer* layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor, - optionalWeights, - optionalBiases, - layerName.c_str()); - armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]); layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); - RegisterInputSlots(graph, layerIndex, layer); + RegisterInputSlots(graph, layerIndex, layer, ignoreSlots); RegisterOutputSlots(graph, layerIndex, layer); } diff --git a/src/armnnDeserializer/Deserializer.hpp b/src/armnnDeserializer/Deserializer.hpp index b1362c44b6..b4dc68b72d 100644 --- a/src/armnnDeserializer/Deserializer.hpp +++ b/src/armnnDeserializer/Deserializer.hpp @@ -143,9 +143,12 @@ private: void ParseTransposeConvolution2d(GraphPtr graph, unsigned int layerIndex); void ParseUnidirectionalSequenceLstm(GraphPtr graph, unsigned int layerIndex); - void RegisterInputSlots(GraphPtr graph, uint32_t layerIndex, - armnn::IConnectableLayer* layer); - void RegisterOutputSlots(GraphPtr graph, uint32_t layerIndex, + void RegisterInputSlots(GraphPtr graph, + uint32_t layerIndex, + armnn::IConnectableLayer* layer, + std::vector ignoreSlots = {}); + void RegisterOutputSlots(GraphPtr graph, + uint32_t layerIndex, armnn::IConnectableLayer* layer); // NOTE index here must be from flatbuffer object index property @@ -171,6 +174,9 @@ private: // Default values to zero for backward compatibility unsigned int m_WeightsLayoutScheme = 0; + + // Default values to zero for backward compatibility + unsigned int m_ConstTensorsAsInputs = 0; }; FeatureVersions GetFeatureVersions(GraphPtr graph); diff --git a/src/armnnDeserializer/test/DeserializeFullyConnected.cpp b/src/armnnDeserializer/test/DeserializeFullyConnected.cpp index da2db08fd4..5e298d1525 100644 --- a/src/armnnDeserializer/test/DeserializeFullyConnected.cpp +++ b/src/armnnDeserializer/test/DeserializeFullyConnected.cpp @@ -117,22 +117,241 @@ struct FullyConnectedFixture : public ParserFlatbuffersSerializeFixture } }; + +struct FullyConnectedFixtureConstantAsInput : public ParserFlatbuffersSerializeFixture +{ + explicit FullyConnectedFixtureConstantAsInput() + { + m_JsonString = R"( + { + "layers": [ + { + "layer_type": "InputLayer", + "layer": { + "base": { + "base": { + "index": 0, + "layerName": "InputLayer", + "layerType": "Input", + "inputSlots": [ + + ], + "outputSlots": [ + { + "index": 0, + "tensorInfo": { + "dimensions": [ + 1, + 4, + 1, + 1 + ], + "dataType": "QAsymmU8", + "quantizationScale": 1.0, + "quantizationOffset": 0, + "quantizationDim": 0, + "dimensionality": 1, + "dimensionSpecificity": [ + true, + true, + true, + true + ] + } + } + ] + }, + "layerBindingId": 0 + } + } + }, + { + "layer_type": "FullyConnectedLayer", + "layer": { + "base": { + "index": 1, + "layerName": "FullyConnectedLayer", + "layerType": "FullyConnected", + "inputSlots": [ + { + "index": 0, + "connection": { + "sourceLayerIndex": 0, + "outputSlotIndex": 0 + } + }, + { + "index": 1, + "connection": { + "sourceLayerIndex": 2, + "outputSlotIndex": 0 + } + } + ], + "outputSlots": [ + { + "index": 0, + "tensorInfo": { + "dimensions": [ + 1, + 1 + ], + "dataType": "QAsymmU8", + "quantizationScale": 2.0, + "quantizationOffset": 0, + "quantizationDim": 0, + "dimensionality": 1, + "dimensionSpecificity": [ + true, + true + ] + } + } + ] + }, + "descriptor": { + "biasEnabled": false, + "transposeWeightsMatrix": true, + "constantWeights": true + } + } + }, + { + "layer_type": "ConstantLayer", + "layer": { + "base": { + "index": 2, + "layerName": "", + "layerType": "Constant", + "inputSlots": [ + + ], + "outputSlots": [ + { + "index": 0, + "tensorInfo": { + "dimensions": [ + 1, + 4 + ], + "dataType": "QAsymmU8", + "quantizationScale": 1.0, + "quantizationOffset": 0, + "quantizationDim": 0, + "dimensionality": 1, + "dimensionSpecificity": [ + true, + true + ], + "isConstant": true, + } + } + ] + }, + "input": { + "info": { + "dimensions": [ + 1, + 4 + ], + "dataType": "QAsymmU8", + "quantizationScale": 1.0, + "quantizationOffset": 0, + "quantizationDim": 0, + "dimensionality": 1, + "dimensionSpecificity": [ + true, + true + ] + }, + "data_type": "ByteData", + "data": { + "data": [ + 2, + 3, + 4, + 5 + ] + } + } + } + }, + { + "layer_type": "OutputLayer", + "layer": { + "base": { + "base": { + "index": 3, + "layerName": "OutputLayer", + "layerType": "Output", + "inputSlots": [ + { + "index": 0, + "connection": { + "sourceLayerIndex": 1, + "outputSlotIndex": 0 + } + } + ], + "outputSlots": [ + + ] + }, + "layerBindingId": 0 + } + } + } + ], + "inputIds": [ + 0 + ], + "outputIds": [ + 0 + ], + "featureVersions": { + "bindingIdsScheme": 1, + "weightsLayoutScheme": 1, + "constantTensorsAsInputs": 1 + } + } + )"; + Setup(); + } +}; + struct FullyConnectedWithNoBiasFixture : FullyConnectedFixture { FullyConnectedWithNoBiasFixture() - : FullyConnectedFixture("[ 1, 4, 1, 1 ]", // inputShape - "[ 1, 1 ]", // outputShape - "[ 1, 4 ]", // filterShape - "QuantisedAsymm8") // filterData + : FullyConnectedFixture("[ 1, 4, 1, 1 ]", // inputShape + "[ 1, 1 ]", // outputShape + "[ 1, 4 ]", // filterShape + "QuantisedAsymm8") // filterData {} }; TEST_CASE_FIXTURE(FullyConnectedWithNoBiasFixture, "FullyConnectedWithNoBias") +{ + // Weights and biases used to be always constant and were stored as members of the layer. This has changed and + // they are now passed as inputs (ConstantLayer) but the old way can still be used for now. + RunTest<2, armnn::DataType::QAsymmU8>( + 0, + {{"InputLayer", { 10, 20, 30, 40 }}}, + {{"OutputLayer", { 400/2 }}}); +} + +struct FullyConnectedWithNoBiasFixtureConstantAsInput : FullyConnectedFixtureConstantAsInput +{ + FullyConnectedWithNoBiasFixtureConstantAsInput() + : FullyConnectedFixtureConstantAsInput() + {} +}; + +TEST_CASE_FIXTURE(FullyConnectedWithNoBiasFixtureConstantAsInput, "FullyConnectedWithNoBiasConstantAsInput") { RunTest<2, armnn::DataType::QAsymmU8>( - 0, - {{"InputLayer", { 10, 20, 30, 40 }}}, - {{"OutputLayer", { 400/2 }}}); + 0, + {{"InputLayer", { 10, 20, 30, 40 }}}, + {{"OutputLayer", { 400/2 }}}); } } diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp index 1fb5b96b8f..a7e6902fdd 100644 --- a/src/armnnOnnxParser/OnnxParser.cpp +++ b/src/armnnOnnxParser/OnnxParser.cpp @@ -532,6 +532,9 @@ OnnxParserImpl::CreateConstTensor(const std::string name, TensorInfo tensorInfo = *m_TensorsInfo[name].m_info; onnx::TensorProto onnxTensor = *m_TensorsInfo[name].m_tensor; + // Makes sure IsConstant flag is set. + tensorInfo.SetConstant(); + // Const tensors requires at least a list of values if (tensorInfo.GetNumElements() == 0) { @@ -972,27 +975,41 @@ void OnnxParserImpl::AddFullyConnected(const onnx::NodeProto& matmulNode, const m_TensorsInfo[biasName].m_dtype ), CHECK_LOCATION().AsString())); } - layer = m_Network->AddFullyConnectedLayer(desc, - CreateConstTensor(weightName).first, - Optional(CreateConstTensor(biasName).first), - matmulNode.name().c_str()); + + // Just add a FullyConnected layer, weights and biases are handled as inputs now. + layer = m_Network->AddFullyConnectedLayer(desc, matmulNode.name().c_str()); ARMNN_ASSERT(layer != nullptr); auto outputInfo = ComputeOutputInfo({addNode->output(0)}, layer, {m_TensorsInfo[inputName].m_info->GetShape(), m_TensorsInfo[weightName].m_info->GetShape()}); - layer->GetOutputSlot(0).SetTensorInfo(outputInfo[0]); - RegisterInputSlots(layer, {inputName}); + // Add constant layer to store weights/biases and connect to FullyConnected layer.. + if(m_TensorsInfo[weightName].isConstant()) + { + IConnectableLayer* weightsLayer = m_Network->AddConstantLayer(CreateConstTensor(weightName).first); + + weightInfo.SetConstant(); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightInfo); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + } + + if(m_TensorsInfo[biasName].isConstant()) + { + IConnectableLayer* biasLayer = m_Network->AddConstantLayer(CreateConstTensor(biasName).first); + + biasInfo.SetConstant(); + biasLayer->GetOutputSlot(0).SetTensorInfo(biasInfo); + biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u)); + } + + RegisterInputSlots(layer, {inputName, weightName, biasName}); RegisterOutputSlots(layer, {addNode->output(0)}); } else { - layer = m_Network->AddFullyConnectedLayer(desc, - CreateConstTensor(weightName).first, - EmptyOptional(), - matmulNode.name().c_str()); + layer = m_Network->AddFullyConnectedLayer(desc, matmulNode.name().c_str()); ARMNN_ASSERT(layer != nullptr); auto outputInfo = ComputeOutputInfo({matmulNode.output(0)}, layer, @@ -1000,7 +1017,18 @@ void OnnxParserImpl::AddFullyConnected(const onnx::NodeProto& matmulNode, const m_TensorsInfo[weightName].m_info->GetShape()}); layer->GetOutputSlot(0).SetTensorInfo(outputInfo[0]); - RegisterInputSlots(layer, {inputName}); + // Add constant layer to store weights and connect to FullyConnected layer. + if(m_TensorsInfo[weightName].isConstant()) + { + TensorInfo weightInfo = *m_TensorsInfo[weightName].m_info; + IConnectableLayer* weightsLayer = m_Network->AddConstantLayer(CreateConstTensor(weightName).first); + + weightInfo.SetConstant(); + weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u)); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightInfo); + } + + RegisterInputSlots(layer, {inputName, weightName}); RegisterOutputSlots(layer, {matmulNode.output(0)}); } } @@ -1755,6 +1783,7 @@ void OnnxParserImpl::RegisterInputSlots(IConnectableLayer* layer, const std::vec layer->GetNumInputSlots(), CHECK_LOCATION().AsString())); } + for (unsigned int slotIndex = 0; slotIndex < layer->GetNumInputSlots(); ++slotIndex) { std::string tensorId = tensorIds[slotIndex]; diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs index a544161c53..85435a366f 100644 --- a/src/armnnSerializer/ArmnnSchema.fbs +++ b/src/armnnSerializer/ArmnnSchema.fbs @@ -69,6 +69,7 @@ table TensorInfo { quantizationDim:uint; dimensionality:uint = 1; dimensionSpecificity:[bool]; + isConstant:bool = false; } struct Connection { @@ -324,7 +325,7 @@ table FloorLayer{ table FullyConnectedLayer { base:LayerBase; descriptor:FullyConnectedDescriptor; - weights:ConstTensor; + weights:ConstTensor; // ConstTensors are now passed as inputs. biases:ConstTensor; } @@ -1007,6 +1008,7 @@ table AnyLayer { table FeatureCompatibilityVersions { bindingIdsScheme:uint = 0; weightsLayoutScheme:uint = 0; + constantTensorsAsInputs:uint = 0; } // Root type for serialized data is the graph of the network diff --git a/src/armnnSerializer/ArmnnSchema_generated.h b/src/armnnSerializer/ArmnnSchema_generated.h index 27550f0682..ca2bf0c003 100644 --- a/src/armnnSerializer/ArmnnSchema_generated.h +++ b/src/armnnSerializer/ArmnnSchema_generated.h @@ -1685,7 +1685,8 @@ struct TensorInfo FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_QUANTIZATIONSCALES = 12, VT_QUANTIZATIONDIM = 14, VT_DIMENSIONALITY = 16, - VT_DIMENSIONSPECIFICITY = 18 + VT_DIMENSIONSPECIFICITY = 18, + VT_ISCONSTANT = 20 }; const flatbuffers::Vector *dimensions() const { return GetPointer *>(VT_DIMENSIONS); @@ -1711,6 +1712,9 @@ struct TensorInfo FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const flatbuffers::Vector *dimensionSpecificity() const { return GetPointer *>(VT_DIMENSIONSPECIFICITY); } + bool isConstant() const { + return GetField(VT_ISCONSTANT, 0) != 0; + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DIMENSIONS) && @@ -1724,6 +1728,7 @@ struct TensorInfo FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_DIMENSIONALITY) && VerifyOffset(verifier, VT_DIMENSIONSPECIFICITY) && verifier.VerifyVector(dimensionSpecificity()) && + VerifyField(verifier, VT_ISCONSTANT) && verifier.EndTable(); } }; @@ -1756,6 +1761,9 @@ struct TensorInfoBuilder { void add_dimensionSpecificity(flatbuffers::Offset> dimensionSpecificity) { fbb_.AddOffset(TensorInfo::VT_DIMENSIONSPECIFICITY, dimensionSpecificity); } + void add_isConstant(bool isConstant) { + fbb_.AddElement(TensorInfo::VT_ISCONSTANT, static_cast(isConstant), 0); + } explicit TensorInfoBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -1777,7 +1785,8 @@ inline flatbuffers::Offset CreateTensorInfo( flatbuffers::Offset> quantizationScales = 0, uint32_t quantizationDim = 0, uint32_t dimensionality = 1, - flatbuffers::Offset> dimensionSpecificity = 0) { + flatbuffers::Offset> dimensionSpecificity = 0, + bool isConstant = false) { TensorInfoBuilder builder_(_fbb); builder_.add_dimensionSpecificity(dimensionSpecificity); builder_.add_dimensionality(dimensionality); @@ -1786,6 +1795,7 @@ inline flatbuffers::Offset CreateTensorInfo( builder_.add_quantizationOffset(quantizationOffset); builder_.add_quantizationScale(quantizationScale); builder_.add_dimensions(dimensions); + builder_.add_isConstant(isConstant); builder_.add_dataType(dataType); return builder_.Finish(); } @@ -1799,7 +1809,8 @@ inline flatbuffers::Offset CreateTensorInfoDirect( const std::vector *quantizationScales = nullptr, uint32_t quantizationDim = 0, uint32_t dimensionality = 1, - const std::vector *dimensionSpecificity = nullptr) { + const std::vector *dimensionSpecificity = nullptr, + bool isConstant = false) { auto dimensions__ = dimensions ? _fbb.CreateVector(*dimensions) : 0; auto quantizationScales__ = quantizationScales ? _fbb.CreateVector(*quantizationScales) : 0; auto dimensionSpecificity__ = dimensionSpecificity ? _fbb.CreateVector(*dimensionSpecificity) : 0; @@ -1812,7 +1823,8 @@ inline flatbuffers::Offset CreateTensorInfoDirect( quantizationScales__, quantizationDim, dimensionality, - dimensionSpecificity__); + dimensionSpecificity__, + isConstant); } struct ByteData FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -10124,7 +10136,8 @@ struct FeatureCompatibilityVersions FLATBUFFERS_FINAL_CLASS : private flatbuffer typedef FeatureCompatibilityVersionsBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BINDINGIDSSCHEME = 4, - VT_WEIGHTSLAYOUTSCHEME = 6 + VT_WEIGHTSLAYOUTSCHEME = 6, + VT_CONSTANTTENSORSASINPUTS = 8 }; uint32_t bindingIdsScheme() const { return GetField(VT_BINDINGIDSSCHEME, 0); @@ -10132,10 +10145,14 @@ struct FeatureCompatibilityVersions FLATBUFFERS_FINAL_CLASS : private flatbuffer uint32_t weightsLayoutScheme() const { return GetField(VT_WEIGHTSLAYOUTSCHEME, 0); } + uint32_t constantTensorsAsInputs() const { + return GetField(VT_CONSTANTTENSORSASINPUTS, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_BINDINGIDSSCHEME) && VerifyField(verifier, VT_WEIGHTSLAYOUTSCHEME) && + VerifyField(verifier, VT_CONSTANTTENSORSASINPUTS) && verifier.EndTable(); } }; @@ -10150,6 +10167,9 @@ struct FeatureCompatibilityVersionsBuilder { void add_weightsLayoutScheme(uint32_t weightsLayoutScheme) { fbb_.AddElement(FeatureCompatibilityVersions::VT_WEIGHTSLAYOUTSCHEME, weightsLayoutScheme, 0); } + void add_constantTensorsAsInputs(uint32_t constantTensorsAsInputs) { + fbb_.AddElement(FeatureCompatibilityVersions::VT_CONSTANTTENSORSASINPUTS, constantTensorsAsInputs, 0); + } explicit FeatureCompatibilityVersionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); @@ -10165,8 +10185,10 @@ struct FeatureCompatibilityVersionsBuilder { inline flatbuffers::Offset CreateFeatureCompatibilityVersions( flatbuffers::FlatBufferBuilder &_fbb, uint32_t bindingIdsScheme = 0, - uint32_t weightsLayoutScheme = 0) { + uint32_t weightsLayoutScheme = 0, + uint32_t constantTensorsAsInputs = 0) { FeatureCompatibilityVersionsBuilder builder_(_fbb); + builder_.add_constantTensorsAsInputs(constantTensorsAsInputs); builder_.add_weightsLayoutScheme(weightsLayoutScheme); builder_.add_bindingIdsScheme(bindingIdsScheme); return builder_.Finish(); diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp index 44cd1800c4..195b41657a 100644 --- a/src/armnnSerializer/Serializer.cpp +++ b/src/armnnSerializer/Serializer.cpp @@ -1126,7 +1126,6 @@ void SerializerStrategy::SerializeQuantizeLayer(const armnn::IConnectableLayer * // Build FlatBuffer for FullyConnected Layer void SerializerStrategy::SerializeFullyConnectedLayer(const armnn::IConnectableLayer* layer, const armnn::FullyConnectedDescriptor& fullyConnectedDescriptor, - const std::vector& constants, const char*) { // Create FlatBuffer BaseLayer @@ -1139,28 +1138,10 @@ void SerializerStrategy::SerializeFullyConnectedLayer(const armnn::IConnectableL fullyConnectedDescriptor.m_TransposeWeightMatrix, fullyConnectedDescriptor.m_ConstantWeights); - // Create FlatBuffer weights data - flatbuffers::Offset flatBufferWeights; - // Create FlatBuffer bias data - flatbuffers::Offset flatBufferBiases; - if (fullyConnectedDescriptor.m_ConstantWeights && !constants.empty()) - { - armnn::ConstTensor weights = constants.at(0); - flatBufferWeights = CreateConstTensorInfo(weights); - - if (fullyConnectedDescriptor.m_BiasEnabled) - { - armnn::ConstTensor biases = constants.at(1); - flatBufferBiases = CreateConstTensorInfo(biases); - } - } - // Create FlatBuffer FullyConnectedLayer auto flatBufferLayer = serializer::CreateFullyConnectedLayer(m_flatBufferBuilder, flatBufferBaseLayer, - flatBufferDescriptor, - flatBufferWeights, - flatBufferBiases); + flatBufferDescriptor); // Add created FullyConnectedLayer to the FlatBufferLayers CreateAnyLayer(flatBufferLayer.o, serializer::Layer::Layer_FullyConnectedLayer); @@ -1916,7 +1897,8 @@ flatbuffers::Offset SerializerStr serializer::CreateFeatureCompatibilityVersions( m_flatBufferBuilder, 1, // Binding ids scheme version - 1 // Weights layout scheme version + 1, // Weights layout scheme version + 1 // Constant tensors as inputs version ); return versionsTable; } @@ -2110,7 +2092,7 @@ void SerializerStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer, { const armnn::FullyConnectedDescriptor& layerDescriptor = static_cast(descriptor); - SerializeFullyConnectedLayer(layer, layerDescriptor, constants, name); + SerializeFullyConnectedLayer(layer, layerDescriptor, name); break; } case armnn::LayerType::Gather : diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp index dead8739cc..18b2cc77ac 100644 --- a/src/armnnSerializer/Serializer.hpp +++ b/src/armnnSerializer/Serializer.hpp @@ -184,7 +184,6 @@ private: void SerializeFullyConnectedLayer(const armnn::IConnectableLayer* layer, const armnn::FullyConnectedDescriptor& fullyConnectedDescriptor, - const std::vector& constants, const char* name = nullptr); void SerializeGatherLayer(const armnn::IConnectableLayer* layer, diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp index 98532d0cec..9e9df0d1ea 100644 --- a/src/armnnSerializer/test/SerializerTests.cpp +++ b/src/armnnSerializer/test/SerializerTests.cpp @@ -789,6 +789,41 @@ TEST_CASE("SerializeFloor") deserializedNetwork->ExecuteStrategy(verifier); } +using FullyConnectedDescriptor = armnn::FullyConnectedDescriptor; +class FullyConnectedLayerVerifier : public LayerVerifierBaseWithDescriptor +{ +public: + FullyConnectedLayerVerifier(const std::string& layerName, + const std::vector& inputInfos, + const std::vector& outputInfos, + const FullyConnectedDescriptor& descriptor) + : LayerVerifierBaseWithDescriptor(layerName, inputInfos, outputInfos, descriptor) {} + + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const armnn::BaseDescriptor& descriptor, + const std::vector& constants, + const char* name, + const armnn::LayerBindingId id = 0) override + { + armnn::IgnoreUnused(constants, id); + switch (layer->GetType()) + { + case armnn::LayerType::Input: break; + case armnn::LayerType::Output: break; + case armnn::LayerType::Constant: break; + default: + { + VerifyNameAndConnections(layer, name); + const FullyConnectedDescriptor& layerDescriptor = + static_cast(descriptor); + CHECK(layerDescriptor.m_ConstantWeights == m_Descriptor.m_ConstantWeights); + CHECK(layerDescriptor.m_BiasEnabled == m_Descriptor.m_BiasEnabled); + CHECK(layerDescriptor.m_TransposeWeightMatrix == m_Descriptor.m_TransposeWeightMatrix); + } + } + } +}; + TEST_CASE("SerializeFullyConnected") { const std::string layerName("fullyConnected"); @@ -809,11 +844,16 @@ TEST_CASE("SerializeFullyConnected") armnn::INetworkPtr network = armnn::INetwork::Create(); armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + + // Old way of handling constant tensors. + ARMNN_NO_DEPRECATE_WARN_BEGIN armnn::IConnectableLayer* const fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, weights, armnn::Optional(biases), layerName.c_str()); + ARMNN_NO_DEPRECATE_WARN_END + armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); @@ -825,13 +865,11 @@ TEST_CASE("SerializeFullyConnected") armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); CHECK(deserializedNetwork); - const std::vector constants {weights, biases}; - LayerVerifierBaseWithDescriptorAndConstants verifier( - layerName, {inputInfo}, {outputInfo}, descriptor, constants); + FullyConnectedLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor); deserializedNetwork->ExecuteStrategy(verifier); } -TEST_CASE("SerializeFullyConnectedWeightsAsInputs") +TEST_CASE("SerializeFullyConnectedWeightsAndBiasesAsInputs") { const std::string layerName("fullyConnected_weights_as_inputs"); const armnn::TensorInfo inputInfo ({ 2, 5, 1, 1 }, armnn::DataType::Float32); @@ -854,8 +892,6 @@ TEST_CASE("SerializeFullyConnectedWeightsAsInputs") armnn::IConnectableLayer* const biasInputLayer = network->AddInputLayer(2); armnn::IConnectableLayer* const fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, - weights, - bias, layerName.c_str()); armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); @@ -878,6 +914,49 @@ TEST_CASE("SerializeFullyConnectedWeightsAsInputs") deserializedNetwork->ExecuteStrategy(verifier); } +TEST_CASE("SerializeFullyConnectedWeightsAndBiasesAsConstantLayers") +{ + const std::string layerName("fullyConnected_weights_as_inputs"); + const armnn::TensorInfo inputInfo ({ 2, 5, 1, 1 }, armnn::DataType::Float32); + const armnn::TensorInfo outputInfo({ 2, 3 }, armnn::DataType::Float32); + + const armnn::TensorInfo weightsInfo({ 5, 3 }, armnn::DataType::Float32); + const armnn::TensorInfo biasesInfo ({ 3 }, armnn::DataType::Float32); + + std::vector weightsData = GenerateRandomData(weightsInfo.GetNumElements()); + std::vector biasesData = GenerateRandomData(biasesInfo.GetNumElements()); + armnn::ConstTensor weights(weightsInfo, weightsData); + armnn::ConstTensor biases(biasesInfo, biasesData); + + armnn::FullyConnectedDescriptor descriptor; + descriptor.m_BiasEnabled = true; + descriptor.m_TransposeWeightMatrix = false; + descriptor.m_ConstantWeights = true; + + armnn::INetworkPtr network = armnn::INetwork::Create(); + armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0); + armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights"); + armnn::IConnectableLayer* const biasesLayer = network->AddConstantLayer(biases, "Biases"); + armnn::IConnectableLayer* const fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor,layerName.c_str()); + armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0); + + inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0)); + weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1)); + biasesLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(2)); + fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); + weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo); + biasesLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo); + fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); + + armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network)); + CHECK(deserializedNetwork); + + FullyConnectedLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor); + deserializedNetwork->ExecuteStrategy(verifier); +} + TEST_CASE("SerializeGather") { using GatherDescriptor = armnn::GatherDescriptor; diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index b669ae4efa..3e59244753 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -555,6 +555,9 @@ CreateConstTensorImpl(TfLiteParserImpl::BufferRawPtr bufferPtr, ::memcpy(data.get(), bufferPtr->data.data(), tensorInfo.GetNumBytes()); } + // Make sure isConstant flag is set. + tensorInfo.SetConstant(); + return std::make_pair(ConstTensor(tensorInfo, data.get()), std::move(data)); } @@ -2571,42 +2574,26 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator armnn::IConnectableLayer* layer = nullptr; auto layerName = fmt::format("FullyConnected:{}:{}", subgraphIndex, operatorIndex); - Optional filterOptionalConstTensor; + auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); + // Add the first input tensor to the registration list + std::vector tensorIndexesToRegister = {inputTensorIndexes[0]}; + std::vector ignoreInputWhenRegister = {}; desc.m_ConstantWeights = IsConstTensor(inputs[1]); - auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex)); - std::vector tensorIndexesToRegister = {inputTensorIndexes[0]}; - if (desc.m_ConstantWeights) - { - filterOptionalConstTensor = Optional(CreateConstTensorNonPermuted(inputs[1], filterTensorInfo)); - } - else - { - // Non const weights will need to be registered as inputs - tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]); - } + // Add the weights input to the registration list, constant layers will be added by SetupConstantLayers if constant. + tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]); - Optional biasOptionalConstTensor; if (inputs.size() == 3) { desc.m_BiasEnabled = true; - if (desc.m_ConstantWeights) - { - TensorInfo biasTensorInfo = ToTensorInfo(inputs[2]); - biasOptionalConstTensor = Optional(CreateConstTensorNonPermuted(inputs[2], biasTensorInfo)); - } - else - { - // Non const biases will need to be registered as inputs - tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); - } + + // Add the biases input to the registration list, constant layer will be added by SetupConstantLayers. + tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); } - layer = m_Network->AddFullyConnectedLayer(desc, - filterOptionalConstTensor, - biasOptionalConstTensor, - layerName.c_str()); + // Filters and biases are always passed to fully connected as inputs + layer = m_Network->AddFullyConnectedLayer(desc, layerName.c_str()); ARMNN_ASSERT(layer != nullptr); armnn::TensorInfo inputTensorInfo = ToTensorInfo(inputs[0]); @@ -3732,6 +3719,7 @@ void TfLiteParserImpl::RegisterInputSlots(size_t subgraphIndex, { CHECK_MODEL(m_Model, subgraphIndex, operatorIndex); ARMNN_ASSERT(layer != nullptr); + if (tensorIndexes.size() + startingSlotIndex != layer->GetNumInputSlots()) { throw ParseException( @@ -3831,19 +3819,27 @@ void TfLiteParserImpl::SetupConstantLayers(size_t subgraphIndex) m_SubgraphConnections[subgraphIndex][tensorIndex].inputSlots.size() > 0) { TensorRawPtr tensorPtr = subgraphPtr->tensors[tensorIndex].get(); - armnn::TensorInfo tensorInfo = ToTensorInfo(tensorPtr); - auto tensorAndData = CreateConstTensorNonPermuted(tensorPtr, tensorInfo); - std::string layerName = fmt::format("Constant:{}", tensorPtr->name); - IConnectableLayer *layer = - m_Network->AddConstantLayer(tensorAndData, layerName.c_str()); + if(IsConstTensor(tensorPtr)) + { + armnn::TensorInfo tensorInfo = ToTensorInfo(tensorPtr); + auto tensorAndData = CreateConstTensorNonPermuted(tensorPtr, tensorInfo); - layer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - RegisterOutputSlots(subgraphIndex, - VIRTUAL_OPERATOR_ID, - layer, - { tensorIndex }); + std::string layerName = fmt::format("Constant:{}", tensorPtr->name); + IConnectableLayer *layer = m_Network->AddConstantLayer(tensorAndData, layerName.c_str()); + layer->GetOutputSlot(0).SetTensorInfo(tensorInfo); + RegisterOutputSlots(subgraphIndex, + VIRTUAL_OPERATOR_ID, + layer, + { tensorIndex }); + } + else + { + throw ParseException( + fmt::format("Invalid Tensor: Tensor should be constant. {}", + CHECK_LOCATION().AsString())); + } } } } @@ -3863,6 +3859,9 @@ TfLiteParserImpl::CreateConstTensorAndStoreData(TfLiteParserImpl::BufferRawPtr b armnn::TensorInfo& tensorInfo, armnn::Optional permutationVector) { + // Make sure isConstant flag is set. + tensorInfo.SetConstant(); + auto constData = CreateConstTensorImpl(bufferPtr, tensorPtr, tensorInfo, @@ -3885,7 +3884,6 @@ bool TfLiteParserImpl::IsConstTensor(TensorRawPtr tensorPtr) return isConst; } - std::pair TfLiteParserImpl::CreateConstTensorPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, @@ -3895,6 +3893,9 @@ TfLiteParserImpl::CreateConstTensorPermuted(TensorRawPtr tensorPtr, auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer); CHECK_BUFFER_SIZE(bufferPtr, tensorInfo, tensorPtr->buffer); + // Make sure isConstant flag is set. + tensorInfo.SetConstant(); + switch (tensorInfo.GetDataType()) { case armnn::DataType::Float32: @@ -3941,6 +3942,9 @@ armnn::ConstTensor TfLiteParserImpl::CreateConstTensorNonPermuted(TensorRawPtr t auto bufferPtr = GetBuffer(m_Model, tensorPtr->buffer); CHECK_BUFFER_SIZE(bufferPtr, tensorInfo, tensorPtr->buffer); + // Make sure isConstant flag is set. + tensorInfo.SetConstant(); + return ConstTensor(tensorInfo, bufferPtr->data.data()); } diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 3fe0823b03..319cdb106b 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1041,15 +1041,12 @@ void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c { const std::string descriptorName{"FullyConnectedQueueDescriptor"}; - uint32_t numInputs = 1; - if (!m_Parameters.m_ConstantWeights) + uint32_t numInputs = 2; + if (m_Parameters.m_BiasEnabled) { - numInputs = 2; - if (m_Parameters.m_BiasEnabled) - { - numInputs = 3; - } + numInputs = 3; } + ValidateNumInputs(workloadInfo, descriptorName, numInputs); ValidateNumOutputs(workloadInfo, descriptorName, 1); @@ -1063,30 +1060,12 @@ void FullyConnectedQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) c throw InvalidArgumentException(descriptorName + ": Input tensor must have 2 or 4 dimensions."); } - TensorInfo weightTensorInfo; - if (m_Parameters.m_ConstantWeights) - { - ValidatePointer(m_Weight, descriptorName, "weight"); - weightTensorInfo = m_Weight->GetTensorInfo(); - } - else - { - weightTensorInfo = workloadInfo.m_InputTensorInfos[1]; - } + TensorInfo weightTensorInfo = workloadInfo.m_InputTensorInfos[1]; ValidateTensorNumDimensions(weightTensorInfo, descriptorName, 2, "weight"); if (m_Parameters.m_BiasEnabled) { - TensorInfo biasTensorInfo; - if (m_Parameters.m_ConstantWeights) - { - ValidatePointer(m_Bias, descriptorName, "bias"); - biasTensorInfo = m_Bias->GetTensorInfo(); - } - else - { - biasTensorInfo = workloadInfo.m_InputTensorInfos[2]; - } + TensorInfo biasTensorInfo = workloadInfo.m_InputTensorInfos[2]; // Validates type and quantization values. ValidateBiasTensorQuantization(biasTensorInfo, inputTensorInfo, weightTensorInfo, descriptorName); ValidateTensorDataType(biasTensorInfo, GetBiasDataType(inputTensorInfo.GetDataType()), descriptorName, "bias"); @@ -1894,11 +1873,9 @@ void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const }; ValidateDataTypes(inputTensorInfo, supportedTypes, descriptorName); - - if (inputTensorInfo != outputTensorInfo) - { - throw InvalidArgumentException(descriptorName + ": Input and output tensor infos do not match."); - } + ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); + ValidateTensorShapesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); + ValidateTensorQuantizationSpace(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output"); } void LstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 1c18551679..3f5972dab6 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -36,7 +36,11 @@ const TensorInfo OverrideDataType(const TensorInfo& info, Optional typ return info; } - return TensorInfo(info.GetShape(), type.value(), info.GetQuantizationScale(), info.GetQuantizationOffset()); + return TensorInfo(info.GetShape(), + type.value(), + info.GetQuantizationScale(), + info.GetQuantizationOffset(), + info.IsConstant()); } } // anonymous namespace @@ -364,16 +368,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, TensorInfo weightsInfo; const TensorInfo* weightsInfoPtr = nullptr; - if (descriptor.m_ConstantWeights) - { - ARMNN_ASSERT(cLayer->m_Weight.get() != nullptr); - weightsInfo = OverrideDataType(cLayer->m_Weight->GetTensorInfo(), dataType); - } - else - { - weightsInfo = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), dataType); - - } + weightsInfo = OverrideDataType(layer.GetInputSlot(1).GetConnection()->GetTensorInfo(), dataType); weightsInfoPtr = &weightsInfo; TensorInfo biasInfo; @@ -385,17 +380,8 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, if (descriptor.m_BiasEnabled) { - if(descriptor.m_ConstantWeights) - { - ARMNN_ASSERT(cLayer->m_Bias.get() != nullptr); - biasInfo = OverrideDataType(cLayer->m_Bias->GetTensorInfo(), GetBiasTypeFromWeightsType(dataType)); - biasInfoPtr = &biasInfo; - } - else - { - biasInfo = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), dataType); - biasInfoPtr = &biasInfo; - } + biasInfo = OverrideDataType(layer.GetInputSlot(2).GetConnection()->GetTensorInfo(), dataType); + biasInfoPtr = &biasInfo; } else { diff --git a/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp index 923d6f3641..af6b56852a 100644 --- a/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp @@ -28,10 +28,7 @@ armnn::INetworkPtr CreateFullyConnectedNetworkNonConstWeights(const armnn::Tenso armnn::IConnectableLayer* inputLayer = network->AddInputLayer(0, "Input"); armnn::IConnectableLayer* weightsInputLayer = network->AddInputLayer(1, "Weights_Input"); - armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, - armnn::EmptyOptional(), - armnn::EmptyOptional(), - "Fully_Connected"); + armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected"); armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output"); Connect(inputLayer, fullyConnectedLayer, inputTensorInfo, 0, 0); @@ -41,6 +38,52 @@ armnn::INetworkPtr CreateFullyConnectedNetworkNonConstWeights(const armnn::Tenso return network; } +armnn::INetworkPtr CreateFullyConnectedNetworkNonConstWeightsConstBias(const armnn::TensorInfo& inputTensorInfo, + const armnn::TensorInfo& outputTensorInfo, + const armnn::TensorInfo& weightsTensorInfo, + const armnn::TensorInfo& biasTensorInfo, + const armnn::ConstTensor& biasConstantTensor, + armnn::FullyConnectedDescriptor descriptor) +{ + armnn::INetworkPtr network(armnn::INetwork::Create()); + + armnn::IConnectableLayer* inputLayer = network->AddInputLayer(0, "Input"); + armnn::IConnectableLayer* weightsInputLayer = network->AddInputLayer(1, "Weights_Input"); + armnn::IConnectableLayer* biasLayer = network->AddConstantLayer(biasConstantTensor, "Weights"); + armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected"); + armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output"); + + Connect(inputLayer, fullyConnectedLayer, inputTensorInfo, 0, 0); + Connect(weightsInputLayer, fullyConnectedLayer, weightsTensorInfo, 0, 1); + Connect(biasLayer, fullyConnectedLayer, biasTensorInfo, 0, 2); + Connect(fullyConnectedLayer, outputLayer, outputTensorInfo, 0, 0); + + return network; +} + +armnn::INetworkPtr CreateFullyConnectedNetworkConstWeightsNonConstBias(const armnn::TensorInfo& inputTensorInfo, + const armnn::TensorInfo& outputTensorInfo, + const armnn::TensorInfo& weightsTensorInfo, + const armnn::TensorInfo& biasTensorInfo, + const armnn::ConstTensor& weightsConstantTensor, + armnn::FullyConnectedDescriptor descriptor) +{ + armnn::INetworkPtr network(armnn::INetwork::Create()); + + armnn::IConnectableLayer* inputLayer = network->AddInputLayer(0, "Input"); + armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weightsConstantTensor, "Weights"); + armnn::IConnectableLayer* biasLayer = network->AddInputLayer(2, "Bias_Input"); + armnn::IConnectableLayer* fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor, "Fully_Connected"); + armnn::IConnectableLayer* outputLayer = network->AddOutputLayer(0, "Output"); + + Connect(inputLayer, fullyConnectedLayer, inputTensorInfo, 0, 0); + Connect(weightsLayer, fullyConnectedLayer, weightsTensorInfo, 0, 1); + Connect(biasLayer, fullyConnectedLayer, biasTensorInfo, 0, 2); + Connect(fullyConnectedLayer, outputLayer, outputTensorInfo, 0, 0); + + return network; +} + template> void FullyConnectedWithDynamicWeightsEndToEnd(const std::vector& backends) { @@ -94,4 +137,123 @@ void FullyConnectedWithDynamicWeightsEndToEnd(const std::vector> +void FullyConnectedWithDynamicOrConstantInputsEndToEnd(const std::vector& backends, + const bool transposeWeights, + const bool constantWeightsOrBias) +{ + unsigned int inputWidth = 1; + unsigned int inputHeight = 1; + unsigned int inputChannels = 5; + unsigned int inputNum = 2; + + unsigned int outputChannels = 3; + unsigned int outputNum = 2; + + unsigned int inputShape[] = { inputNum, inputChannels, inputHeight, inputWidth }; + unsigned int outputShape[] = { outputNum, outputChannels }; + unsigned int weightsShape[] = { inputChannels, outputChannels }; + + if (transposeWeights) + { + std::swap(weightsShape[0], weightsShape[1]); + } + + unsigned int biasShape[] = { outputChannels }; + + armnn::TensorInfo inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32); + armnn::TensorInfo outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::DataType::Float32); + armnn::TensorInfo weightsDesc = armnn::TensorInfo(2, weightsShape, armnn::DataType::Float32); + armnn::TensorInfo biasesDesc = armnn::TensorInfo(1, biasShape, armnn::DataType::Float32); + + std::vector input = + { + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 5.0f, 4.0f, 3.0f, 2.0f, 1.0f + }; + + std::vector weights = + { + .5f, 2.f, .5f, + .5f, 2.f, 1.f, + .5f, 2.f, 2.f, + .5f, 2.f, 3.f, + .5f, 2.f, 4.f + }; + + if (transposeWeights) + { + weights = + { + .5f, .5f, .5f, .5f, .5f, + 2.f, 2.f, 2.f, 2.f, 2.f, + .5f, 1.f, 2.f, 3.f, 4.f + }; + } + + std::vector biasValues = std::vector({10.f, 20.f, 30.f}); + + std::vector expectedOutput = + { + 0.5f + 1.0f + 1.5f + 2.0f + 2.5f + biasValues[0], + 2.0f + 4.0f + 6.0f + 8.0f + 10.f + biasValues[1], + 0.5f + 2.0f + 6.0f + 12.f + 20.f + biasValues[2], + + 2.5f + 2.0f + 1.5f + 1.0f + 0.5f + biasValues[0], + 10.0f + 8.0f + 6.0f + 4.0f + 2.f + biasValues[1], + 2.5f + 4.0f + 6.0f + 6.f + 4.f + biasValues[2] + }; + + FullyConnectedDescriptor descriptor; + descriptor.m_BiasEnabled = true; + descriptor.m_TransposeWeightMatrix = transposeWeights; + descriptor.m_ConstantWeights = constantWeightsOrBias; + + if (!constantWeightsOrBias) + { + // Tests non constant weights and constant bias. + ConstTensor biasConstantTensor(biasesDesc, biasValues.data()); + + armnn::INetworkPtr network = CreateFullyConnectedNetworkNonConstWeightsConstBias(inputTensorInfo, + outputTensorInfo, + weightsDesc, + biasesDesc, + biasConstantTensor, + descriptor); + CHECK(network); + + std::map> inputTensorData = {{ 0, input }, {1, weights}}; + std::map> expectedOutputTensorData = {{ 0, expectedOutput }}; + + EndToEndLayerTestImpl(move(network), + inputTensorData, + expectedOutputTensorData, + backends, + 1.0f); + } + else + { + // Tests constant weights and non constant bias. + ConstTensor weightsConstantTensor(weightsDesc, weights.data()); + + armnn::INetworkPtr network = CreateFullyConnectedNetworkConstWeightsNonConstBias(inputTensorInfo, + outputTensorInfo, + weightsDesc, + biasesDesc, + weightsConstantTensor, + descriptor); + CHECK(network); + + std::map> inputTensorData = {{ 0, input }, {2, biasValues}}; + std::map> expectedOutputTensorData = {{ 0, expectedOutput }}; + + EndToEndLayerTestImpl(move(network), + inputTensorData, + expectedOutputTensorData, + backends, + 1.0f); + } +} + } // anonymous namespace diff --git a/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp index c47048e566..dcf87fe92b 100644 --- a/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/FullyConnectedTestImpl.cpp @@ -22,56 +22,6 @@ template LayerTestResult SimpleFullyConnectedTestImpl( - armnn::IWorkloadFactory& workloadFactory, - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, - const armnn::ITensorHandleFactory& tensorHandleFactory, - armnn::TensorInfo inputTensorInfo, - armnn::TensorInfo outputTensorInfo, - armnn::TensorInfo weightsDesc, - armnn::TensorInfo biasesDesc, - std::vector& weights, - std::vector& bias, - std::vector& input, - bool biasEnabled, - bool transposeWeights) -{ - std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); - std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); - - armnn::FullyConnectedQueueDescriptor data; - armnn::WorkloadInfo info; - armnn::ScopedTensorHandle weightsTensor(weightsDesc); - armnn::ScopedTensorHandle biasTensor(biasesDesc); - - std::vector actualOutput(outputTensorInfo.GetNumElements()); - - AllocateAndCopyDataToITensorHandle(&weightsTensor, weights.data()); - AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); - - AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get()); - AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); - data.m_Weight = &weightsTensor; - data.m_Bias = &biasTensor; - data.m_Parameters.m_BiasEnabled = biasEnabled; - data.m_Parameters.m_TransposeWeightMatrix = transposeWeights; - - std::unique_ptr workload = workloadFactory.CreateFullyConnected(data, info); - LayerTestResult result(outputTensorInfo); - - inputHandle->Allocate(); - outputHandle->Allocate(); - CopyDataToITensorHandle(inputHandle.get(), input.data()); - - ExecuteWorkload(*workload, memoryManager); - - CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get()); - result.m_ActualData = actualOutput; - - return result; -} - -template -LayerTestResult SimpleFullyConnectedTestWeightsAsInputsImpl( armnn::IWorkloadFactory& workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const armnn::ITensorHandleFactory& tensorHandleFactory, @@ -83,7 +33,8 @@ LayerTestResult SimpleFullyConnectedTestWeightsAsInputsImpl( std::vector& bias, std::vector& input, bool biasEnabled, - bool transposeWeights) + bool transposeWeights, + bool constantWeights) { std::unique_ptr input0Handle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); std::unique_ptr input1Handle = tensorHandleFactory.CreateTensorHandle(weightsTensorInfo); @@ -93,13 +44,23 @@ LayerTestResult SimpleFullyConnectedTestWeightsAsInputsImpl( armnn::FullyConnectedQueueDescriptor data; armnn::WorkloadInfo info; + armnn::ScopedTensorHandle weightsTensor(weightsTensorInfo); + armnn::ScopedTensorHandle biasTensor(biasesTensorInfo); + + AllocateAndCopyDataToITensorHandle(&weightsTensor, weights.data()); + AllocateAndCopyDataToITensorHandle(&biasTensor, bias.data()); AddInputToWorkload(data, info, inputTensorInfo, input0Handle.get()); AddInputToWorkload(data, info, weightsTensorInfo, input1Handle.get()); AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get()); + + // Need to set as layer members will be null when creating the workload because the optimization hasn't been run. + data.m_Weight = &weightsTensor; + data.m_Bias = &biasTensor; + data.m_Parameters.m_BiasEnabled = biasEnabled; data.m_Parameters.m_TransposeWeightMatrix = transposeWeights; - data.m_Parameters.m_ConstantWeights = false; + data.m_Parameters.m_ConstantWeights = constantWeights; std::unique_ptr input2Handle = nullptr; if (biasEnabled) @@ -180,36 +141,19 @@ LayerTestResult FullyConnectedTest( std::vector bias = {9250, 67500}; - if (constantWeights) - { - result = SimpleFullyConnectedTestImpl(workloadFactory, - memoryManager, - tensorHandleFactory, - inputTensorInfo, - outputTensorInfo, - weightsDesc, - biasesDesc, - weights, - bias, - input, - biasEnabled, - true); - } - else - { - result = SimpleFullyConnectedTestWeightsAsInputsImpl(workloadFactory, - memoryManager, - tensorHandleFactory, - inputTensorInfo, - outputTensorInfo, - weightsDesc, - biasesDesc, - weights, - bias, - input, - biasEnabled, - true); - } + result = SimpleFullyConnectedTestImpl(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + weightsDesc, + biasesDesc, + weights, + bias, + input, + biasEnabled, + true, + constantWeights); if (biasEnabled) { @@ -299,7 +243,7 @@ LayerTestResult FullyConnectedLargeTestCommon( inputTensorInfo, outputTensorInfo, weightsDesc, biasesDesc, weights, biasValues, input, - true, transposeWeights + true, transposeWeights, true ); result.m_ExpectedData = armnnUtils::QuantizedVector({ 965432.0f }, qScale, qOffset); @@ -408,7 +352,7 @@ LayerTestResult FullyConnectedFloat32Test( inputTensorInfo, outputTensorInfo, weightsDesc, biasesDesc, weights, biasValues, input, - biasEnabled, transposeWeights + biasEnabled, transposeWeights, true ); std::vector expectedOutput = diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp index 441f4ebdf4..2855957e31 100644 --- a/src/backends/reference/RefBackend.hpp +++ b/src/backends/reference/RefBackend.hpp @@ -12,7 +12,8 @@ namespace armnn const BackendCapabilities cpuRefCapabilities("CpuRef", { {"NonConstWeights", true}, - {"AsyncExecution", true} + {"AsyncExecution", true}, + {"ConstantTensorsAsInputs", true} }); const std::set oldCpuRefCapabilities { diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp index 4293ef54f3..fae8d0cdd4 100644 --- a/src/backends/reference/test/RefCreateWorkloadTests.cpp +++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp @@ -486,6 +486,24 @@ TEST_CASE("RefCreateFullyConnectedWithBlobWorkloadTest") TensorInfo({ 3, 7 }, armnn::DataType::Float32, outputQScale)); } +TEST_CASE("CreateFullyConnectedWorkloadWeightsBiasesAsInputsFloat32") +{ + Graph graph; + RefWorkloadFactory factory = GetFactory(); + + auto workload = + CreateFullyConnectedWorkloadWeightsBiasesAsInputsTest(factory, graph); + + // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest). + float inputsQScale = 0.0f; + float outputQScale = 0.0f; + CheckInputsOutput(std::move(workload), + TensorInfo({ 3, 1, 4, 5 }, armnn::DataType::Float32, inputsQScale), + TensorInfo({ 7, 20 }, armnn::DataType::Float32, inputsQScale), + TensorInfo({ 3, 7 }, armnn::DataType::Float32, outputQScale)); +} + template static void RefCreateFullyConnectedWorkloadTest() { diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 69a2048078..424df977c8 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -600,11 +600,21 @@ TEST_CASE("RefFillEndToEndTestInt32") FillEndToEnd(defaultBackends); } -TEST_CASE("RefFullyConnectedEndToEndTestInt32") +TEST_CASE("RefFullyConnectedEndToEndTestFloat32") { FullyConnectedWithDynamicWeightsEndToEnd(defaultBackends); } +TEST_CASE("RefFullyConnectedEndToEndTestNonConstantWeightsConstantBiasesFloat32") +{ + FullyConnectedWithDynamicOrConstantInputsEndToEnd(defaultBackends, true, true); +} + +TEST_CASE("RefFullyConnectedEndToEndTestConstantWeightsNonConstantBiasesFloat32") +{ + FullyConnectedWithDynamicOrConstantInputsEndToEnd(defaultBackends, true, false); +} + TEST_CASE("RefGatherFloatTest") { GatherEndToEnd(defaultBackends); diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp index 99e3eab075..5a7951ec48 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp @@ -16,20 +16,6 @@ RefFullyConnectedWorkload::RefFullyConnectedWorkload( const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { - if (descriptor.m_Parameters.m_ConstantWeights) - { - m_Weight = std::make_unique(*(descriptor.m_Weight)); - const TensorInfo& rWeightInfo = m_Weight->GetTensorInfo(); - m_WeightShape = rWeightInfo.GetShape(); - m_WeightDecoder = MakeDecoder(rWeightInfo, m_Weight->Map(true)); - - if (descriptor.m_Parameters.m_BiasEnabled) - { - m_Bias = std::make_unique(*(descriptor.m_Bias)); - const TensorInfo& biasInfo = m_Bias->GetTensorInfo(); - m_BiasDecoder = MakeDecoder(biasInfo, m_Bias->Map(true)); - } - } } void RefFullyConnectedWorkload::PostAllocationConfigure() @@ -44,18 +30,15 @@ void RefFullyConnectedWorkload::PostAllocationConfigure(std::vector 1); m_InputShape = inputInfo.GetShape(); - if (!m_Data.m_Parameters.m_ConstantWeights) + const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]); + ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); + m_WeightShape = rWeightInfo.GetShape(); + m_WeightDecoder = MakeDecoder(rWeightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) { - const TensorInfo& rWeightInfo = GetTensorInfo(inputs[1]); - ARMNN_ASSERT(inputInfo.GetNumDimensions() > 1); - m_WeightShape = rWeightInfo.GetShape(); - m_WeightDecoder = MakeDecoder(rWeightInfo); - - if (m_Data.m_Parameters.m_BiasEnabled) - { - const TensorInfo& biasInfo = GetTensorInfo(inputs[2]); - m_BiasDecoder = MakeDecoder(biasInfo); - } + const TensorInfo& biasInfo = GetTensorInfo(inputs[2]); + m_BiasDecoder = MakeDecoder(biasInfo); } const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); @@ -87,13 +70,10 @@ void RefFullyConnectedWorkload::Execute(std::vector inputs, std: std::unique_ptr> inputDecoder = MakeDecoder(GetTensorInfo(inputs[0]), inputs[0]->Map()); std::unique_ptr> OutputEncoder = MakeEncoder(GetTensorInfo(outputs[0]), outputs[0]->Map()); - if (!m_Data.m_Parameters.m_ConstantWeights) + m_WeightDecoder->Reset(inputs[1]->Map()); + if (m_Data.m_Parameters.m_BiasEnabled) { - m_WeightDecoder->Reset(inputs[1]->Map()); - if (m_Data.m_Parameters.m_BiasEnabled) - { - m_BiasDecoder->Reset(inputs[2]->Map()); - } + m_BiasDecoder->Reset(inputs[2]->Map()); } FullyConnected(m_InputShape, -- cgit v1.2.1